getfem-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] (no subject)


From: Konstantinos Poulios
Subject: [Getfem-commits] (no subject)
Date: Wed, 27 Dec 2023 10:15:42 -0500 (EST)

branch: overhaul-build-system
commit 4ff66802b9ffb055b6c4805f4c3b573ecf217fd3
Author: Konstantinos Poulios <logari81@gmail.com>
AuthorDate: Wed Dec 27 16:15:24 2023 +0100

    Remove old copy of SuperLU, overhaul configure.ac, treat MUMPS and SuperLU 
equally
    
      - Old SuperLU code removed from sources
      - Fixes the SuperLU interface to work with modern releases of SuperLU
      - Unused and duplicate preprocessor definitions removed from config
        headers
      - Removes the header-free interface to SuperLU in GetFEM and the files
        "getfem_superlu.cc" and "getfem_superlu.h". Now, the Gmm header
        "gmm_superlu_interface.h" is the single point of access to SuperLU.
      - Configuration preprocessor variables are now defined either in Gmm or
        in GetFEM, exclusively. Based on two manually maintained header file
        templates "gmm_arch_config.h.in" and "getfem_arch_config.h.in". The
        header file generated by autoheader is not used.
      - All configuration options are fixed at configure time in the generated
        headers "gmm_arch_config.h" and "getfem_arch_config.h", and cannot
        be overridden with the "-D" option in the command line
      - Both SuperLU and MUMPS are now treated equally in the configure script
      - At least one of the two solvers, MUMPS or SuperLU, is required in
        order to build GetFEM
      - All unit tests were made to run and pass with either SuperLU or MUMPS
      - Unit test "make_gmm_test.pl" now respects the lapack configuration of
        the project (as found in "gmm_arch_config.h")
---
 Makefile.am                                        |     3 +-
 configure.ac                                       |   458 +-
 contrib/crack_plate/crack_bilaplacian_problem.cc   |    17 +-
 contrib/icare/icare.cc                             |    43 +-
 contrib/xfem_contact/xfem_stokes.cc                |     4 +
 .../xfem_stab_unilat_contact.cc                    |     9 +-
 interface/src/getfemint_precond.h                  |    12 +-
 interface/src/gf_linsolve.cc                       |     6 +-
 interface/src/gf_precond.cc                        |     4 +
 interface/src/gfi_array.h                          |     2 +-
 interface/src/octave/gfi_array.h                   |     2 +-
 interface/src/python/getfem_python.c               |    14 +-
 src/Makefile.am                                    |     2 -
 src/getfem/bgeot_config.h                          |     1 +
 src/getfem/getfem_arch_config.h.in                 |    49 +
 src/getfem/getfem_config.h                         |    36 +-
 src/getfem/getfem_model_solvers.h                  |    25 +-
 src/getfem/getfem_superlu.h                        |   130 -
 src/getfem_superlu.cc                              |   430 -
 src/gmm/gmm_arch_config.h.in                       |    25 +
 src/gmm/gmm_solver_Schwarz_additive.h              |    10 +-
 src/gmm/gmm_superlu_interface.h                    |     9 +-
 superlu/BLAS.c                                     | 43902 -------------------
 superlu/BLAS/License.txt                           |    14 -
 superlu/BLAS/caxpy.f                               |   102 -
 superlu/BLAS/ccopy.f                               |    94 -
 superlu/BLAS/cdotc.f                               |   103 -
 superlu/BLAS/cdotu.f                               |   100 -
 superlu/BLAS/cgbmv.f                               |   390 -
 superlu/BLAS/cgemm.f                               |   483 -
 superlu/BLAS/cgemv.f                               |   350 -
 superlu/BLAS/cgerc.f                               |   227 -
 superlu/BLAS/cgeru.f                               |   227 -
 superlu/BLAS/chbmv.f                               |   380 -
 superlu/BLAS/chemm.f                               |   371 -
 superlu/BLAS/chemv.f                               |   337 -
 superlu/BLAS/cher.f                                |   278 -
 superlu/BLAS/cher2.f                               |   317 -
 superlu/BLAS/cher2k.f                              |   442 -
 superlu/BLAS/cherk.f                               |   396 -
 superlu/BLAS/chpmv.f                               |   338 -
 superlu/BLAS/chpr.f                                |   279 -
 superlu/BLAS/chpr2.f                               |   318 -
 superlu/BLAS/crotg.f                               |    74 -
 superlu/BLAS/cscal.f                               |    91 -
 superlu/BLAS/csrot.f                               |   153 -
 superlu/BLAS/csscal.f                              |    94 -
 superlu/BLAS/cswap.f                               |    98 -
 superlu/BLAS/csymm.f                               |   369 -
 superlu/BLAS/csyr2k.f                              |   396 -
 superlu/BLAS/csyrk.f                               |   363 -
 superlu/BLAS/ctbmv.f                               |   429 -
 superlu/BLAS/ctbsv.f                               |   432 -
 superlu/BLAS/ctpmv.f                               |   388 -
 superlu/BLAS/ctpsv.f                               |   390 -
 superlu/BLAS/ctrmm.f                               |   452 -
 superlu/BLAS/ctrmv.f                               |   373 -
 superlu/BLAS/ctrsm.f                               |   477 -
 superlu/BLAS/ctrsv.f                               |   375 -
 superlu/BLAS/dasum.f                               |   111 -
 superlu/BLAS/daxpy.f                               |   115 -
 superlu/BLAS/dcabs1.f                              |    58 -
 superlu/BLAS/dcopy.f                               |   115 -
 superlu/BLAS/ddot.f                                |   117 -
 superlu/BLAS/dgbmv.f                               |   370 -
 superlu/BLAS/dgemm.f                               |   384 -
 superlu/BLAS/dgemv.f                               |   330 -
 superlu/BLAS/dger.f                                |   227 -
 superlu/BLAS/dnrm2.f                               |   112 -
 superlu/BLAS/drot.f                                |   101 -
 superlu/BLAS/drotg.f                               |    86 -
 superlu/BLAS/drotm.f                               |   202 -
 superlu/BLAS/drotmg.f                              |   251 -
 superlu/BLAS/dsbmv.f                               |   375 -
 superlu/BLAS/dscal.f                               |   110 -
 superlu/BLAS/dsdot.f                               |   172 -
 superlu/BLAS/dspmv.f                               |   331 -
 superlu/BLAS/dspr.f                                |   261 -
 superlu/BLAS/dspr2.f                               |   296 -
 superlu/BLAS/dswap.f                               |   122 -
 superlu/BLAS/dsymm.f                               |   367 -
 superlu/BLAS/dsymv.f                               |   333 -
 superlu/BLAS/dsyr.f                                |   263 -
 superlu/BLAS/dsyr2.f                               |   298 -
 superlu/BLAS/dsyr2k.f                              |   399 -
 superlu/BLAS/dsyrk.f                               |   364 -
 superlu/BLAS/dtbmv.f                               |   398 -
 superlu/BLAS/dtbsv.f                               |   401 -
 superlu/BLAS/dtpmv.f                               |   352 -
 superlu/BLAS/dtpsv.f                               |   354 -
 superlu/BLAS/dtrmm.f                               |   415 -
 superlu/BLAS/dtrmv.f                               |   342 -
 superlu/BLAS/dtrsm.f                               |   443 -
 superlu/BLAS/dtrsv.f                               |   338 -
 superlu/BLAS/dzasum.f                              |    98 -
 superlu/BLAS/dznrm2.f                              |   119 -
 superlu/BLAS/icamax.f                              |   107 -
 superlu/BLAS/idamax.f                              |   106 -
 superlu/BLAS/isamax.f                              |   106 -
 superlu/BLAS/izamax.f                              |   107 -
 superlu/BLAS/lsame.f                               |   125 -
 superlu/BLAS/sasum.f                               |   112 -
 superlu/BLAS/saxpy.f                               |   115 -
 superlu/BLAS/scabs1.f                              |    57 -
 superlu/BLAS/scasum.f                              |    97 -
 superlu/BLAS/scnrm2.f                              |   119 -
 superlu/BLAS/scopy.f                               |   115 -
 superlu/BLAS/sdot.f                                |   117 -
 superlu/BLAS/sdsdot.f                              |   255 -
 superlu/BLAS/sgbmv.f                               |   370 -
 superlu/BLAS/sgemm.f                               |   384 -
 superlu/BLAS/sgemv.f                               |   330 -
 superlu/BLAS/sger.f                                |   227 -
 superlu/BLAS/snrm2.f                               |   112 -
 superlu/BLAS/srot.f                                |   101 -
 superlu/BLAS/srotg.f                               |    86 -
 superlu/BLAS/srotm.f                               |   203 -
 superlu/BLAS/srotmg.f                              |   251 -
 superlu/BLAS/ssbmv.f                               |   375 -
 superlu/BLAS/sscal.f                               |   110 -
 superlu/BLAS/sspmv.f                               |   331 -
 superlu/BLAS/sspr.f                                |   261 -
 superlu/BLAS/sspr2.f                               |   296 -
 superlu/BLAS/sswap.f                               |   122 -
 superlu/BLAS/ssymm.f                               |   367 -
 superlu/BLAS/ssymv.f                               |   333 -
 superlu/BLAS/ssyr.f                                |   263 -
 superlu/BLAS/ssyr2.f                               |   298 -
 superlu/BLAS/ssyr2k.f                              |   399 -
 superlu/BLAS/ssyrk.f                               |   364 -
 superlu/BLAS/stbmv.f                               |   398 -
 superlu/BLAS/stbsv.f                               |   401 -
 superlu/BLAS/stpmv.f                               |   352 -
 superlu/BLAS/stpsv.f                               |   354 -
 superlu/BLAS/strmm.f                               |   415 -
 superlu/BLAS/strmv.f                               |   342 -
 superlu/BLAS/strsm.f                               |   443 -
 superlu/BLAS/strsv.f                               |   344 -
 superlu/BLAS/xerbla.f                              |    89 -
 superlu/BLAS/xerbla_array.f                        |   119 -
 superlu/BLAS/zaxpy.f                               |   102 -
 superlu/BLAS/zcopy.f                               |    94 -
 superlu/BLAS/zdotc.f                               |   103 -
 superlu/BLAS/zdotu.f                               |   100 -
 superlu/BLAS/zdrot.f                               |   153 -
 superlu/BLAS/zdscal.f                              |    94 -
 superlu/BLAS/zgbmv.f                               |   390 -
 superlu/BLAS/zgemm.f                               |   483 -
 superlu/BLAS/zgemv.f                               |   350 -
 superlu/BLAS/zgerc.f                               |   227 -
 superlu/BLAS/zgeru.f                               |   227 -
 superlu/BLAS/zhbmv.f                               |   380 -
 superlu/BLAS/zhemm.f                               |   371 -
 superlu/BLAS/zhemv.f                               |   337 -
 superlu/BLAS/zher.f                                |   278 -
 superlu/BLAS/zher2.f                               |   317 -
 superlu/BLAS/zher2k.f                              |   443 -
 superlu/BLAS/zherk.f                               |   396 -
 superlu/BLAS/zhpmv.f                               |   338 -
 superlu/BLAS/zhpr.f                                |   279 -
 superlu/BLAS/zhpr2.f                               |   318 -
 superlu/BLAS/zrotg.f                               |    75 -
 superlu/BLAS/zscal.f                               |    91 -
 superlu/BLAS/zswap.f                               |    98 -
 superlu/BLAS/zsymm.f                               |   369 -
 superlu/BLAS/zsyr2k.f                              |   396 -
 superlu/BLAS/zsyrk.f                               |   363 -
 superlu/BLAS/ztbmv.f                               |   429 -
 superlu/BLAS/ztbsv.f                               |   432 -
 superlu/BLAS/ztpmv.f                               |   388 -
 superlu/BLAS/ztpsv.f                               |   390 -
 superlu/BLAS/ztrmm.f                               |   452 -
 superlu/BLAS/ztrmv.f                               |   373 -
 superlu/BLAS/ztrsm.f                               |   477 -
 superlu/BLAS/ztrsv.f                               |   375 -
 superlu/BLAS_f2c.h                                 |   236 -
 superlu/License.txt                                |    30 -
 superlu/Makefile.am                                |   329 -
 superlu/ccolumn_bmod.c                             |   362 -
 superlu/ccolumn_dfs.c                              |   266 -
 superlu/ccopy_to_ucol.c                            |   112 -
 superlu/cgscon.c                                   |   155 -
 superlu/cgsequ.c                                   |   205 -
 superlu/cgsrfs.c                                   |   457 -
 superlu/cgssv.c                                    |   231 -
 superlu/cgssvx.c                                   |   627 -
 superlu/cgstrf.c                                   |   444 -
 superlu/cgstrs.c                                   |   344 -
 superlu/clacon.c                                   |   236 -
 superlu/clangs.c                                   |   132 -
 superlu/claqgs.c                                   |   160 -
 superlu/cmemory.c                                  |   691 -
 superlu/cmyblas2.c                                 |   204 -
 superlu/colamd.c                                   |  3412 --
 superlu/colamd.h                                   |   246 -
 superlu/cpanel_bmod.c                              |   478 -
 superlu/cpanel_dfs.c                               |   256 -
 superlu/cpivotL.c                                  |   171 -
 superlu/cpivotgrowth.c                             |   130 -
 superlu/cpruneL.c                                  |   156 -
 superlu/creadhb.c                                  |   288 -
 superlu/csnode_bmod.c                              |   117 -
 superlu/csnode_dfs.c                               |   113 -
 superlu/csp_blas2.c                                |   577 -
 superlu/csp_blas3.c                                |   141 -
 superlu/cutil.c                                    |   482 -
 superlu/dcolumn_bmod.c                             |   354 -
 superlu/dcolumn_dfs.c                              |   267 -
 superlu/dcomplex.c                                 |   116 -
 superlu/dcopy_to_ucol.c                            |   112 -
 superlu/dgscon.c                                   |   156 -
 superlu/dgsequ.c                                   |   206 -
 superlu/dgsrfs.c                                   |   447 -
 superlu/dgssv.c                                    |   231 -
 superlu/dgssvx.c                                   |   626 -
 superlu/dgstrf.c                                   |   441 -
 superlu/dgstrs.c                                   |   330 -
 superlu/dgstrsL.c                                  |   230 -
 superlu/dlacon.c                                   |   250 -
 superlu/dlamch.c                                   |  1004 -
 superlu/dlangs.c                                   |   132 -
 superlu/dlaqgs.c                                   |   158 -
 superlu/dmemory.c                                  |   690 -
 superlu/dmyblas2.c                                 |   246 -
 superlu/dpanel_bmod.c                              |   449 -
 superlu/dpanel_dfs.c                               |   256 -
 superlu/dpivotL.c                                  |   170 -
 superlu/dpivotgrowth.c                             |   129 -
 superlu/dpruneL.c                                  |   156 -
 superlu/dreadhb.c                                  |   277 -
 superlu/dsnode_bmod.c                              |   114 -
 superlu/dsnode_dfs.c                               |   113 -
 superlu/dsp_blas2.c                                |   498 -
 superlu/dsp_blas3.c                                |   141 -
 superlu/dutil.c                                    |   479 -
 superlu/dzsum1.c                                   |   102 -
 superlu/f2c_lite.c                                 |   391 -
 superlu/get_perm_c.c                               |   472 -
 superlu/heap_relax_snode.c                         |   113 -
 superlu/icmax1.c                                   |   124 -
 superlu/izmax1.c                                   |   117 -
 superlu/lsame.c                                    |   111 -
 superlu/memory.c                                   |   230 -
 superlu/mmd.c                                      |  1021 -
 superlu/relax_snode.c                              |    80 -
 superlu/scolumn_bmod.c                             |   360 -
 superlu/scolumn_dfs.c                              |   278 -
 superlu/scomplex.c                                 |   127 -
 superlu/scopy_to_ucol.c                            |   112 -
 superlu/scsum1.c                                   |   111 -
 superlu/sgscon.c                                   |   155 -
 superlu/sgsequ.c                                   |   205 -
 superlu/sgsrfs.c                                   |   446 -
 superlu/sgssv.c                                    |   230 -
 superlu/sgssvx.c                                   |   623 -
 superlu/sgstrf.c                                   |   431 -
 superlu/sgstrs.c                                   |   331 -
 superlu/slacon.c                                   |   249 -
 superlu/slamch.c                                   |  1023 -
 superlu/slangs.c                                   |   131 -
 superlu/slaqgs.c                                   |   157 -
 superlu/slu_Cnames.h                               |   356 -
 superlu/slu_cdefs.h                                |   246 -
 superlu/slu_dcomplex.h                             |    93 -
 superlu/slu_ddefs.h                                |   243 -
 superlu/slu_scomplex.h                             |    93 -
 superlu/slu_sdefs.h                                |   243 -
 superlu/slu_util.h                                 |   287 -
 superlu/slu_zdefs.h                                |   246 -
 superlu/smemory.c                                  |   689 -
 superlu/smyblas2.c                                 |   245 -
 superlu/sp_coletree.c                              |   354 -
 superlu/sp_ienv.c                                  |    86 -
 superlu/sp_preorder.c                              |   224 -
 superlu/spanel_bmod.c                              |   462 -
 superlu/spanel_dfs.c                               |   256 -
 superlu/spivotL.c                                  |   182 -
 superlu/spivotgrowth.c                             |   129 -
 superlu/spruneL.c                                  |   156 -
 superlu/sreadhb.c                                  |   276 -
 superlu/ssnode_bmod.c                              |   115 -
 superlu/ssnode_dfs.c                               |   113 -
 superlu/ssp_blas2.c                                |   481 -
 superlu/ssp_blas3.c                                |   140 -
 superlu/superlu_timer.c                            |    76 -
 superlu/supermatrix.h                              |   165 -
 superlu/sutil.c                                    |   478 -
 superlu/util.c                                     |   405 -
 superlu/xerbla.c                                   |    83 -
 superlu/zcolumn_bmod.c                             |   363 -
 superlu/zcolumn_dfs.c                              |   266 -
 superlu/zcopy_to_ucol.c                            |   112 -
 superlu/zgscon.c                                   |   152 -
 superlu/zgsequ.c                                   |   205 -
 superlu/zgsrfs.c                                   |   456 -
 superlu/zgssv.c                                    |   230 -
 superlu/zgssvx.c                                   |   623 -
 superlu/zgstrf.c                                   |   432 -
 superlu/zgstrs.c                                   |   344 -
 superlu/zlacon.c                                   |   236 -
 superlu/zlangs.c                                   |   131 -
 superlu/zlaqgs.c                                   |   159 -
 superlu/zmemory.c                                  |   689 -
 superlu/zmyblas2.c                                 |   203 -
 superlu/zpanel_bmod.c                              |   477 -
 superlu/zpanel_dfs.c                               |   256 -
 superlu/zpivotL.c                                  |   171 -
 superlu/zpivotgrowth.c                             |   129 -
 superlu/zpruneL.c                                  |   156 -
 superlu/zreadhb.c                                  |   286 -
 superlu/zsnode_bmod.c                              |   129 -
 superlu/zsnode_dfs.c                               |   113 -
 superlu/zsp_blas2.c                                |   576 -
 superlu/zsp_blas3.c                                |   140 -
 superlu/zutil.c                                    |   482 -
 tests/laplacian.cc                                 |     3 +-
 tests/make_gmm_test.pl                             |    36 +-
 tests/schwarz_additive.cc                          |    78 +-
 tests/test_condensation.cc                         |    16 +-
 319 files changed, 494 insertions(+), 128462 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 95b0d912..8049fdc3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -18,11 +18,12 @@
 
 ACLOCAL_AMFLAGS = -I m4
 
-SUBDIRS = m4 cubature @SUPERLU_SRC@ src tests interface contrib bin doc
+SUBDIRS = m4 cubature src tests interface contrib bin doc
 
 EXTRA_DIST = GNU_LGPL_V3 GNU_GPL_V3 GNU_GCC_RUNTIME_EXCEPTION GNU_FDL_V3
 
 CLEANFILES = so_locations _configs.sed
+DISTCLEANFILES = config_autogenerated_not_used.h.in
 
 # DISTCHECK_CONFIGURE_FLAGS = --enable-matlab --enable-octave
 
diff --git a/configure.ac b/configure.ac
index 4fcf6c1f..8538453a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -31,15 +31,15 @@ AC_INIT(getfem, 5.4.2)
 MAJOR_VERSION="5"
 MINOR_VERSION="4"
 PATCH_VERSION="2"
-AC_DEFINE_UNQUOTED([MAJOR_VERSION],$MAJOR_VERSION,[Major version number])
-AC_DEFINE_UNQUOTED([MINOR_VERSION],$MINOR_VERSION,[Minor version number])
-AC_DEFINE_UNQUOTED([PATCH_VERSION],$PATCH_VERSION,[Patch version number])
+AC_DEFINE_UNQUOTED(GETFEM_PACKAGE_NAME,"${PACKAGE_NAME}",[GetFEM package name])
+AC_DEFINE_UNQUOTED(GETFEM_PACKAGE_STRING,"${PACKAGE_STRING}",[GetFEM package 
string])
+AC_DEFINE_UNQUOTED(GETFEM_PACKAGE_TARNAME,"${PACKAGE_TARNAME}",[GetFEM package 
tarname])
+AC_DEFINE_UNQUOTED(GETFEM_VERSION,"${PACKAGE_VERSION}",[GetFEM version])
+AC_DEFINE_UNQUOTED(GMM_VERSION,"${PACKAGE_VERSION}",[GMM version])
 
 AC_CONFIG_SRCDIR([install-sh])
 AC_CONFIG_MACRO_DIR([m4])
-AC_CONFIG_HEADER(config.h)
-AX_PREFIX_CONFIG_H(src/getfem/getfem_arch_config.h,GETFEM) 
-AX_PREFIX_CONFIG_H(src/gmm/gmm_arch_config.h,GMM) 
+AC_CONFIG_HEADERS([config_autogenerated_not_used.h 
src/getfem/getfem_arch_config.h src/gmm/gmm_arch_config.h])
 AC_PREREQ(2.61)
 AC_ARG_PROGRAM
 
@@ -75,7 +75,7 @@ AC_ARG_ENABLE(paralevel,
 ])
 
 if test $paralevel -ge 1; then
-  CPPFLAGS="$CPPFLAGS -DGETFEM_PARA_LEVEL=$paralevel"
+  AC_DEFINE_UNQUOTED(GETFEM_PARA_LEVEL,$paralevel,[Parallelization level 
(0|1|2)])
 fi;
 dnl ---------------------------END OF PARA LEVEL-------------------
 
@@ -87,12 +87,13 @@ dnl -----------------------------------------------
 
 USER_CXXFLAGS="$CXXFLAGS"
 USER_CFLAGS="$CFLAGS"
-AX_PROG_CXX_MPI([test $paralevel -ge 1],[usempi=yes],[usempi=no])
-AX_PROG_CC_MPI([test "x$usempi" = "xyes"],,[usempi=no])
-AX_PROG_FC_MPI([test "x$usempi" = "xyes"],[CPPFLAGS="$CPPFLAGS 
-DGMM_USES_MPI"],[usempi=no])
+AX_PROG_CXX_MPI(  [test $paralevel -ge 1],[usempi=yes],[usempi=no])
+AX_PROG_CC_MPI([test "x$usempi" = "xyes"],[usempi=yes],[usempi=no])
+AX_PROG_FC_MPI([test "x$usempi" = "xyes"],[usempi=yes],[usempi=no])
 
 if  test "x$usempi" = "xyes"; then
-AC_CHECK_LIB(mpi_cxx, _init,,)
+AC_CHECK_LIB(mpi_cxx, _init,
+             [AC_DEFINE(GMM_USES_MPI,,[defined if GMM uses MPI])],)
 fi
 
 AC_PROG_CXXCPP
@@ -311,7 +312,7 @@ if test x$use_rpc = xyes; then
               RPC_LIB="$withval")
   AC_SUBST(RPC_INC_DIR)
   AC_SUBST(RPC_LIB)
-  AC_DEFINE_UNQUOTED(USE_RPC, 1, [Use rpc for getfem communication with 
matlab])
+  AC_DEFINE(GETFEM_USE_RPC,,[Use rpc for getfem communication with matlab])
 fi;
 AC_SUBST(GETFEM_SERVER)
 AM_CONDITIONAL(BUILDMEXRPC, test x$matlab_rpc = xyes)
@@ -654,7 +655,7 @@ fi
 
 dnl ACX_BLAS([ echo "OK, You have working BLAS libs !"; HAVE_VENDOR_BLAS=1 
],[echo "YOU DONT HAVE BLAS! Using a cheap replacement" ; HAVE_VENDOR_BLAS=0])
 LIBS="$LIBS $BLAS_LIBS"
-CPPFLAGS="$CPPFLAGS -DGMM_USES_BLAS"
+AC_DEFINE(GMM_USES_BLAS,,[defined if GMM is linked to a blas library])
 
 useblas64support=NO
 AC_ARG_ENABLE(blas64-support,
@@ -684,7 +685,7 @@ if test x$useblas64support = xNO && test x$usematlab = 
xYES; then
 fi
 
 if test x$useblas64support = xYES; then
-  AC_DEFINE_UNQUOTED(USE_BLAS64_INTERFACE, 1, [Use blas with 64 bits integers])
+  AC_DEFINE(GMM_USE_BLAS64_INTERFACE,,[Use blas with 64 bits integers])
 fi
 
 dnl ------------------------------LAPACK TEST--------------------------------
@@ -704,8 +705,8 @@ if test x"$acx_blas_ok" = xyes; then
   fi
 
   if test x"$acx_lapack_ok" = xyes; then
-     CPPFLAGS="$CPPFLAGS -DGMM_USES_LAPACK"
-     LIBS="$LAPACK_LIBS $LIBS"
+    AC_DEFINE(GMM_USES_LAPACK,,[defined if GMM is linked to a lapack library])
+    LIBS="$LAPACK_LIBS $LIBS"
   fi
 fi
 
@@ -727,8 +728,8 @@ AC_ARG_ENABLE(multithread-blas,
 if test x$multithread_blas = xNO; then
   AC_CHECK_LIB(dl, dlsym, [acx_dl_ok=yes; DL_LIBS="-ldl"])
   if test x"$acx_dl_ok" = xyes; then
-     AC_DEFINE_UNQUOTED([FORCE_SINGLE_THREAD_BLAS],1,[enable openblas to be 
multithreaded])
-     LIBS="$DL_LIBS $LIBS"
+    AC_DEFINE(GETFEM_FORCE_SINGLE_THREAD_BLAS,,[enable openblas to be 
multithreaded])
+    LIBS="$DL_LIBS $LIBS"
   fi
 fi;
 dnl --------------------------end of multithread-blas--------------------
@@ -754,7 +755,7 @@ if test x$useopenmp = xYES; then
   AC_OPENMP
   if test "x$ac_cv_prog_cxx_openmp" != "xunsupported" && test 
"x$ac_cv_prog_cxx_openmp" != "x"; then
     AC_SUBST(AM_CXXFLAGS,"$OPENMP_CXXFLAGS")
-    CPPFLAGS="$CPPFLAGS -DGETFEM_HAS_OPENMP"
+    AC_DEFINE(GETFEM_HAS_OPENMP,,[defined if GetFEM is built with OpenMP 
parallelization])
   else
     AC_MSG_ERROR([OpenMP support not found. Use --enable-openmp=no flag to 
compile GetFEM without OpenMP]);
   fi
@@ -762,92 +763,215 @@ fi;
 dnl ---------------------------END OF OPENMP-----------------------
 
 
-dnl ------------------------------SuperLU config-------------------------
+dnl ------------------------------SUPERLU TEST---------------------------
+require_superlu="auto"
 AC_ARG_ENABLE(superlu,
- [AS_HELP_STRING([--enable-superlu],[turn on/off SuperLU support])],
- [case "${enableval}" in
-   yes) usesuperlu=YES ;;
-   no)  usesuperlu=NO ;;
-   *) AC_MSG_ERROR([bad value ${enableval} for --enable-superlu]) ;;
- esac],[usesuperlu=YES])
-
-SUPERLU_CPPFLAGS=""
-SUPERLU_SRC=""
-SUPERLU_LIBS=""
-SUPERLU_MAKEFILE=""
-
-if test x$usesuperlu = xYES; then
-  echo "Building with SuperLU support (use --enable-superlu=no to disable it)"
-  if test x"$FC" = "x"; then
-    sgemm="sgemm_"
+  [AS_HELP_STRING([--enable-superlu], [Enable SuperLU support])],
+  [require_superlu=$enableval],
+  [require_superlu="auto"])
+
+SUPERLU_LIBS="-lsuperlu"
+# the user can override these defaults using --with-superlu=
+AC_ARG_WITH(superlu,
+ [AS_HELP_STRING([--with-superlu=<lib>],[use SuperLU library <lib>])],
+ [case $with_superlu in
+   yes | "")
+     if test "x$require_superlu" = "xno"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-superlu and 
--with-superlu.])
+     elif test "x$require_superlu" = "xauto"; then
+       require_superlu="yes"
+     fi;;
+   no)
+     if test "x$require_superlu" = "xyes"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-superlu and 
--with-superlu.])
+     elif test "x$require_superlu" = "xauto"; then
+       require_superlu="no"
+     fi;;
+   -* | */* | *.a | *.so | *.so.* | *.o| builtin) 
SUPERLU_LIBS="$with_superlu";;
+   *) SUPERLU_LIBS=`echo $with_superlu | sed -e 's/^/-l/g;s/ \+/ -l/g'`;;
+  esac]
+)
+
+SUPERLUINC=""
+AC_ARG_WITH(superlu-include-dir,
+ [AS_HELP_STRING([--with-superlu-include-dir],[directory in which the 
superlu/sl*.h headers can be found])],
+ [ if test x$require_superlu = xno; then
+     AC_MSG_ERROR([Inconsistent options for --enable-superlu, --with-superlu 
and --with-superlu-include-dir.]);
+   else
+     require_superlu="yes"
+     case $withval in
+       -I* ) SUPERLUINC="$withval";;
+       * ) SUPERLUINC="-I$withval";;
+     esac
+   fi;],
+)
+CPPFLAGS="$CPPFLAGS $SUPERLUINC"
+
+if test "x$require_superlu" = "xno"; then
+  SUPERLU_LIBS=""
+  found_superlu="no"
+  echo "Building with SuperLU explicitly disabled";
+else
+  AC_CHECK_HEADERS(
+    [superlu/slu_Cnames.h superlu/slu_cdefs.h superlu/slu_ddefs.h 
superlu/slu_sdefs.h superlu/slu_zdefs.h \
+     superlu/slu_dcomplex.h superlu/slu_scomplex.h],
+    [found_superlu="yes"],
+    [ if test "x$require_superlu" = "xyes"; then
+        AC_MSG_ERROR([Header files of SuperLU not found.]);
+      else
+        found_superlu="no"
+      fi;
+    ])
+  if test x$found_superlu = xyes; then
+    save_LIBS="$LIBS";
+    AC_CHECK_LIB([superlu], [dCreate_CompCol_Matrix],
+                 [AC_DEFINE(GMM_USES_SUPERLU,,[defined if GMM is linked to the 
superlu library])],
+                 [if test "x$require_superlu" = "xyes"; then
+                    AC_MSG_ERROR([SuperLU library not found]);
+                  else
+                    found_superlu="no"
+                  fi;])
+    if test "x$found_superlu" = "xyes"; then
+      echo "Building with SuperLU (use --enable-superlu=no to disable it)"
+      LIBS="$SUPERLU_LIBS $save_LIBS"
+    else
+      SUPERLU_LIBS=""
+      LIBS="$save_LIBS"
+    fi
+  elif test "x$require_superlu" = "xyes"; then
+    AC_MSG_ERROR([SuperLU header files not found but required by the user. 
Aborting configure...]);
   else
-    AC_FC_FUNC(sgemm)
-    echo "FC=$FC"
+    echo "SuperLU header files not found, building without SuperLU"
+    SUPERLU_LIBS=""
   fi
-  case $sgemm in
-    sgemm)
-          F77_CALL_C="NOCHANGE";
-          ;;
-    sgemm_)
-          F77_CALL_C="ADD_";
-          ;;
-    SGEMM)
-          F77_CALL_C="UPCASE";
-          ;;
-    sgemm__)
-          F77_CALL_C="ADD__";
-          ;;
-    *)
-          AC_MSG_ERROR(["superlu won't handle this calling convention: sgemm 
-> $sgemm"])
-          ;;
-  esac
-  SUPERLU_CPPFLAGS="$CPPFLAGS -DUSE_VENDOR_BLAS -DF77_CALL_C=$F77_CALL_C"
-  SUPERLU_SRC="superlu"
-  case $host in
-    *apple*)
-        SUPERLU_LIBS="../$SUPERLU_SRC/libsuperlu.la"
-        ;;
-    *mingw*)
-        SUPERLU_LIBS="../$SUPERLU_SRC/.libs/libsuperlu.a"
-        ;;
-    *)
-        SUPERLU_LIBS="`readlink -f .`/$SUPERLU_SRC/libsuperlu.la"
-        ;;
-  esac
-  SUPERLU_MAKEFILE="$SUPERLU_SRC/Makefile"
-else
-  echo "Building without SuperLU support (use --enable-superlu=yes to enable 
it)"
-  AC_CHECK_LIB([superlu], [dCreate_CompCol_Matrix],[],
-               [AC_MSG_ERROR([SuperLU library not found])])
+fi
 
-  AC_CHECK_HEADERS(
-  [superlu/colamd.h superlu/slu_Cnames.h \
-   superlu/slu_cdefs.h superlu/slu_ddefs.h superlu/slu_sdefs.h 
superlu/slu_zdefs.h \
-   superlu/slu_dcomplex.h superlu/slu_scomplex.h],
-  [usesuperlu="YES"],
-  [
-    if test "x$usesuperlu" = "xYES"; then
-      AC_MSG_ERROR([header files of superlu not found. Use 
--enable-superlu=yes flag]);
+AM_CONDITIONAL(SUPERLU, test x$found_superlu = xyes)
+AC_SUBST([SUPERLU_LIBS])
+if test "x$found_superlu" = "xyes"; then
+  echo "Configuration of SuperLU done"
+fi
+dnl ---------------------------END OF SUPERLU TEST-----------------------
+
+dnl ------------------------------MUMPS TEST-----------------------------
+require_mumps="auto"
+AC_ARG_ENABLE(mumps,
+  [AS_HELP_STRING([--enable-mumps], [Enable MUMPS support])],
+  [require_mumps=$enableval],
+  [require_mumps="auto"])
+
+MUMPS_LIBS=""
+# the user can override these defaults using --with-mumps=
+if test $paralevel -le 1; then # default to the typical naming of the 
sequential libraries
+  MUMPS_LIBS="-lsmumps_seq -ldmumps_seq -lcmumps_seq -lzmumps_seq"
+else # default to the common name for the parallel libraries (the user can 
override this using --with-mumps=)
+  MUMPS_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps"
+fi
+
+AC_ARG_WITH(mumps,
+ [AS_HELP_STRING([--with-mumps=<lib>],[use MUMPS library <lib>])],
+ [case $with_mumps in
+   yes | "")
+     if test "x$require_mumps" = "xno"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-mumps and 
--with-mumps.])
+     elif test "x$require_mumps" = "xauto"; then
+       require_mumps="yes"
+     fi;;
+   no)
+     if test "x$require_mumps" = "xyes"; then
+       AC_MSG_ERROR([Contradicting arguments between --enable-(par-)mumps and 
--with-mumps.])
+     elif test "x$require_mumps" = "xauto"; then
+       require_mumps="no"
+     fi;;
+   -* | */* | *.a | *.so | *.so.* | *.o| builtin) MUMPS_LIBS="$with_mumps";;
+   *) MUMPS_LIBS=`echo $with_mumps | sed -e 's/^/-l/g;s/ \+/ -l/g'`;;
+  esac]
+)
+
+MUMPSINC=""
+AC_ARG_WITH(mumps-include-dir,
+ [AS_HELP_STRING([--with-mumps-include-dir],[directory in which the dmumps.h 
header can be found])],
+ [case $withval in
+   -I* ) MUMPSINC="$withval";;
+   * ) MUMPSINC="-I$withval";;
+  esac],
+)
+CPPFLAGS="$CPPFLAGS $MUMPSINC"
+
+save_LIBS="$LIBS";
+if test "x$require_mumps" = "xno"; then
+  found_mumps="no"
+  echo "Building with MUMPS explicitly disabled";
+else
+ AC_SEARCH_LIBS(smumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function smumps_c couldn't be found in the provided 
MUMPS libraries.]);
     fi;
-  ])
+    found_mumps="no"]
+ )
+ AC_SEARCH_LIBS(dmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function dmumps_c couldn't be found in the provided 
MUMPS libraries.]);
+    fi;
+    found_mumps="no"]
+ )
+ AC_SEARCH_LIBS(cmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function cmumps_c couldn't be found in the provided 
MUMPS libraries.]);
+    fi;
+    found_mumps="no"]
+ )
+ AC_SEARCH_LIBS(zmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([The function zmumps_c couldn't be found in the provided 
MUMPS libraries.]);
+    fi;
+    found_mumps="no"]
+ )
+ AC_CHECK_HEADERS([smumps_c.h dmumps_c.h cmumps_c.h zmumps_c.h],
+   [found_mumps="yes"],
+   [if test "x$require_mumps" = "xyes"; then
+     AC_MSG_ERROR([header file dmumps_c.h not found.]);
+    fi;
+    found_mumps="no"]
+ )
 
-  SUPERLU_LIBS="-lsuperlu"
-  LIBS="$SUPERLU_LIBS $LIBS"
+ if test "x$found_mumps" = "xyes"; then
+   echo "Building with MUMPS (use --enable-mumps=no to disable it)"
+   AC_DEFINE(GMM_USES_MUMPS,,[defined if GMM is linked to the mumps library])
+   LIBS="$MUMPS_LIBS $save_LIBS"
+ else
+   MUMPS_LIBS=""
+   LIBS="$save_LIBS"
+ fi;
+fi;
+
+AM_CONDITIONAL(MUMPS, test x$found_mumps = xyes)
+AC_SUBST([MUMPS_LIBS])
+if test "x$found_mumps" = "xyes"; then
+  echo "Configuration of MUMPS done"
+fi
+dnl ---------------------------END OF MUMPS TEST--------------------------
+
+
+
+if test "x$found_superlu" = "xno" -a "x$found_mumps" = "xno"; then
+  AC_MSG_ERROR([Neither MUMPS nor SuperLU was enabled. At least one linear 
solver is required.])
+  exit 1
 fi
 
-AC_SUBST([SUPERLU_CPPFLAGS])
-AC_SUBST([SUPERLU_SRC])
-AC_SUBST([SUPERLU_LIBS])
-AM_CONDITIONAL(USEBLASLITE, test x$HAVE_VENDOR_BLAS = x0)
-echo "Configuration of SuperLU done"
 
 
-dnl ----------------EXPERIMENTAL PARTS OF THE LIBRARY--------------------
+dnl ----------------EXPERIMENTAL PARTS OF THE LIBRARY---------------------
 EXPER=""
 AC_ARG_ENABLE(experimental,
         [AS_HELP_STRING([--enable-experimental],[compile experimental parts of 
the library])],
 [ if   test "x$enableval" = "xyes" ; then EXPER="-DEXPERIMENTAL_PURPOSE_ONLY"; 
fi], [EXPER=""])
 CPPFLAGS="$CPPFLAGS $EXPER"
+dnl ----------------END OF EXPERIMENTAL PARTS OF THE LIBRARY-------------
 
 dnl -----------------------------QD TESTS--------------------------------
 AC_ARG_WITH(qd-lib-dir,
@@ -891,10 +1015,10 @@ int main() {
   fpu_fix_end(&old_cw); return 1-ok;
 }
   ]])],[echo "checking if qd library is working...yes"],[ echo "QD library is 
not working (check config.log)"; exit 1],[])
-  AC_DEFINE_UNQUOTED([HAVE_QDLIB],1,[defined if the qd library was found and 
is working])
+  AC_DEFINE(GETFEM_HAVE_QDLIB,,[defined if the qd library was found and is 
working])
   HAVE_QDLIB=1;
   if test "x$QD_PREC" = "xquad"; then
-    AC_DEFINE_UNQUOTED([QDLIB_USE_QUAD],1,[defined if quad-doubles are to be 
used instead of double-double])
+    AC_DEFINE(GETFEM_QDLIB_USE_QUAD,,[defined if quad-doubles are to be used 
instead of double-double])
   fi;
 fi;
 dnl -----------------------------END QD TESTS--------------------------------
@@ -914,7 +1038,9 @@ else
   [
     AC_CHECK_LIB(qhullstatic_r, 
qh_new_qhull,[QHULL_LIBS="-lqhullstatic_r"],[QHULL_LIBS=""])
   ])
-  AC_CHECK_HEADERS(libqhull_r/qhull_ra.h,[useQHULL="yes"],
+  AC_CHECK_HEADERS(libqhull_r/qhull_ra.h,
+                   [useQHULL="yes"; 
AC_DEFINE(GETFEM_HAVE_LIBQHULL_R_QHULL_RA_H,,
+                                              [defined if the 
<libqhull_r/qhull_ra.h> header file is available])],
   [
     if test "x$useQHULL" = "xyes"; then
       AC_MSG_ERROR([header files libqhull_r/qhull_ra.h not found. Use 
--enable-qhull=no flag]);
@@ -930,116 +1056,6 @@ AC_SUBST([QHULL_LIBS])
 echo "Configuration of qhull done"
 dnl -----------------------------END OF QHULL TEST---------------------------
 
-dnl ------------------------------MUMPS TEST------------------------------
-MUMPSINC=""
-AC_ARG_WITH(mumps-include-dir,
- [AS_HELP_STRING([--with-mumps-include-dir],[directory in which the dmumps.h 
header can be found])],
- [case $withval in
-   -I* ) MUMPSINC="$withval";;
-   * ) MUMPSINC="-I$withval";;
-  esac],
- [MUMPSINC="-I$GFPREFIX/include"]
-)
-CPPFLAGS="$CPPFLAGS $MUMPSINC"
-
-MUMPS_LIBS=""
-case $host in
-    *mingw*)
-        MUMPS_SEQ_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps -lmumps_common 
-lmpiseq -lpord"
-        ;;
-    *apple*)
-        MUMPS_SEQ_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps -lmumps_common 
-lmpiseq -lpord -lgomp"
-        ;;
-    *)
-        MUMPS_SEQ_LIBS="-lsmumps_seq -ldmumps_seq -lcmumps_seq -lzmumps_seq"
-        ;;
-esac
-acx_mumps_ok="no"
-usemumps="no"
-AC_ARG_ENABLE(mumps,
- [AS_HELP_STRING([--enable-mumps],[enable the use of the (sequential) MUMPS 
library. A direct solver for large sparse linear systems.])],
- [case $enableval in
-   yes | "") usemumps="yes"; acx_mumps_ok="yes"; MUMPS_LIBS="$MUMPS_SEQ_LIBS";;
-   no) usemumps="no";;
-  esac],
- [usemumps="test"; acx_mumps_ok="test"; MUMPS_LIBS="$MUMPS_SEQ_LIBS"]
-)
-
-AC_ARG_ENABLE(par-mumps,
- [AS_HELP_STRING([--enable-par-mumps],[enable the use of the parrallel MUMPS 
library. A direct solver for large sparse linear systems.])],
- [case $enableval in
-   yes | "") usemumps="yes"; MUMPS_LIBS="-lsmumps -ldmumps -lcmumps -lzmumps";;
-   no) usemumps="no";;
-  esac],
- [if test $paralevel -ge 1; then
-    usemumps="test"; acx_mumps_ok="test"; MUMPS_LIBS="-lsmumps -ldmumps 
-lcmumps -lzmumps"
-  fi;]
-)
-
-AC_ARG_WITH(mumps,
- [AS_HELP_STRING([--with-mumps=<lib>],[use MUMPS library <lib>])],
- [case $with_mumps in
-   yes | "") usemumps="yes";;
-   no) acx_mumps_ok="no" ;;
-   -* | */* | *.a | *.so | *.so.* | *.o| builtin) MUMPS_LIBS="$with_mumps"; 
acx_mumps_ok="yes" ;;
-   *) MUMPS_LIBS=`echo $with_mumps | sed -e 's/^/-l/g;s/ \+/ -l/g'` ; 
usemumps="yes";;
-  esac]
-)
-
-save_LIBS="$LIBS";
-if test "x$usemumps" = "xno" -o "x$acx_mumps_ok" = "xno"; then
-  echo "Building with MUMPS explicitly disabled";
-else
- AC_SEARCH_LIBS(smumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function smumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_SEARCH_LIBS(dmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function dmumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_SEARCH_LIBS(cmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function cmumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_SEARCH_LIBS(zmumps_c, [`echo $MUMPS_LIBS | sed -e 's/^-l//g;s/ -l/ /g'`],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([The function zmumps_c couldn't be found in the provided 
MUMPS libraries.]);
-    fi;
-    usemumps="no"]
- )
- AC_CHECK_HEADERS([smumps_c.h dmumps_c.h cmumps_c.h zmumps_c.h],
-   [usemumps="yes"],
-   [if test "x$acx_mumps_ok" = "xyes"; then
-     AC_MSG_ERROR([header file dmumps_c.h not found.]);
-    fi;
-    usemumps="no"]
- )
-
- if test "x$usemumps" = "xyes"; then
-   echo "Building with MUMPS (use --enable-mumps=no to disable it)"
-   LIBS="$MUMPS_LIBS $save_LIBS"
- else
-   MUMPS_LIBS=""
-   LIBS="$save_LIBS"
- fi;
-fi;
-
-AM_CONDITIONAL(MUMPS, test x$usemumps = xyes)
-AC_SUBST([MUMPS_LIBS])
-echo "Configuration of MUMPS done"
-dnl ---------------------------END OF MUMPS TEST--------------------------
-
 dnl ---------------------------METIS--------------------------
 METIS_LIBS=""
 AC_ARG_ENABLE(metis,
@@ -1068,10 +1084,10 @@ else
 
     METIS_LIBS="-lmetis"
     LIBS="$LIBS $METIS_LIBS"
-    AC_DEFINE_UNQUOTED([HAVE_METIS],1,[defined if the Metis library was found 
and is working])
+    AC_DEFINE(GETFEM_HAVE_METIS,,[defined if the Metis library was found and 
is working])
     AC_CHECK_LIB(metis, METIS_SetDefaultOptions, [usemetisnew="yes"],
-                 [AC_DEFINE_UNQUOTED([HAVE_METIS_OLD_API],1,
-                                     [defined if the Metis library found is 
older than version 4])
+                 [AC_DEFINE(GETFEM_HAVE_METIS_OLD_API,,
+                            [defined if the Metis library found is older than 
version 4])
                  ])
     echo "Building with METIS (use --enable-metis=no to disable it)"
     if test "x$usemetisnew" = "xyes"; then
@@ -1091,8 +1107,9 @@ AC_SUBST([METIS_LIBS])
 dnl ---------------------------END OF METIS--------------------------
 
 
-AC_CHECK_HEADERS(sys/times.h,[],[SUPERLU_CPPFLAGS="$SUPERLU_CPPFLAGS 
-DNO_TIMER"])
-AC_CHECK_HEADERS(cxxabi.h)
+AC_CHECK_HEADERS(sys/times.h)
+AC_CHECK_HEADERS(cxxabi.h,
+                 AC_DEFINE(GETFEM_HAVE_CXXABI_H,,[defined if the cxxabi.h 
header file is available]))
 dnl ---------------------------- CHECK FOR __PRETTY_FUNCTION__ MACRO --------
 AC_CACHE_CHECK([for __PRETTY_FUNCTION__], ac_cv_have_pretty_function, [
         AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [
@@ -1100,7 +1117,7 @@ AC_CACHE_CHECK([for __PRETTY_FUNCTION__], 
ac_cv_have_pretty_function, [
                 [ ac_cv_have_pretty_function="yes" ],
                 [ ac_cv_have_pretty_function=="no"  ])])
 if test "x$ac_cv_have_pretty_function" = "xyes"; then
-        AC_DEFINE_UNQUOTED(HAVE_PRETTY_FUNCTION,1,[gcc style 
__PRETTY_FUNCTION__ macro])
+        AC_DEFINE(GMM_HAVE_PRETTY_FUNCTION,,[gcc style __PRETTY_FUNCTION__ 
macro])
 fi;     
 
 
@@ -1112,7 +1129,7 @@ AC_CACHE_CHECK([for execinfo.h and backtrace], 
ac_cv_have_backtrace, [
                 [ ac_cv_have_backtrace="yes" ],
                 [ ac_cv_have_backtrace="no"  ])])
 if test "x$ac_cv_have_backtrace" = "xyes"; then
-        AC_DEFINE_UNQUOTED(HAVE_BACKTRACE,1,[glibc backtrace function])
+        AC_DEFINE(GETFEM_HAVE_BACKTRACE,,[glibc backtrace function])
 fi;     
 
 dnl ---------------------------- CHECK FOR feenableexcept -----
@@ -1123,7 +1140,7 @@ AC_CACHE_CHECK([for fenv.h and feenableexcept], 
ac_cv_have_feenableexcept, [
                 [ ac_cv_have_feenableexcept="yes" ],
                 [ ac_cv_have_feenableexcept="no"  ])])
 if test "x$ac_cv_have_feenableexcept" = "xyes"; then
-        AC_DEFINE_UNQUOTED(HAVE_FEENABLEEXCEPT,1,[glibc floating point 
exceptions control])
+        AC_DEFINE(GETFEM_HAVE_FEENABLEEXCEPT,,[glibc floating point exceptions 
control])
 fi;
 
 BUILDER=`whoami`
@@ -1163,7 +1180,6 @@ AC_CONFIG_FILES(                                          
              \
 Makefile                                                                \
 m4/Makefile                                                             \
 cubature/Makefile                                                       \
-$SUPERLU_MAKEFILE                                                       \
 doc/Makefile                                                            \
 doc/sphinx/Makefile                                                     \
 src/Makefile                                                            \
@@ -1245,10 +1261,24 @@ else
   echo "- Qhull not found. Mesh generation will be disabled."
 fi;
 
-if test "x$usemumps" = "xyes"; then
+if test "x$found_superlu" = "xyes"; then
+  echo "- SuperLU found. A direct solver for large sparse linear systems."
+else
+  if test "x$require_superlu" = "xno"; then
+    echo "- Not using the SuperLU library for large sparse linear systems."
+  else
+    echo "- SuperLU not found. Not using the SuperLU library for large sparse 
linear systems."
+  fi
+fi;
+
+if test "x$found_mumps" = "xyes"; then
   echo "- Mumps found. A direct solver for large sparse linear systems."
 else
-  echo "- Mumps not found. Not using the MUMPS library for large sparse linear 
systems."
+  if test "x$require_superlu" = "xno"; then
+    echo "- Not using the MUMPS library for large sparse linear systems."
+  else
+    echo "- Mumps not found. Not using the MUMPS library for large sparse 
linear systems."
+  fi
 fi;
 
 if test x"$acx_lapack_ok" = xyes; then
diff --git a/contrib/crack_plate/crack_bilaplacian_problem.cc 
b/contrib/crack_plate/crack_bilaplacian_problem.cc
index f258a788..13ad0a13 100644
--- a/contrib/crack_plate/crack_bilaplacian_problem.cc
+++ b/contrib/crack_plate/crack_bilaplacian_problem.cc
@@ -24,8 +24,6 @@
 #include "getfem/getfem_assembling.h" /* import assembly methods (and norms 
comp.) */
 #include "getfem/getfem_fourth_order.h"
 #include "getfem/getfem_model_solvers.h"
-#include "getfem/getfem_superlu.h"
-
 using std::endl; using std::cout; using std::cerr;
 using std::ends; using std::cin;
 template <typename T> std::ostream &operator <<
@@ -1084,8 +1082,12 @@ bool bilaplacian_crack_problem::solve(plain_vector &U) {
     gmm::scale(b, -1.);
     plain_vector X(b);
     scalar_type condest;
-    SuperLU_solve(A, X, gmm::scaled(b, scalar_type(-1)), condest, 1);
+#if defined(GMM_USES_MUMPS)
+    gmm::MUMPS_solve(A, X, gmm::scaled(b, scalar_type(-1)));
+#else
+    gmm::SuperLU_solve(A, X, gmm::scaled(b, scalar_type(-1)), condest, 1);
     cout << "cond super LU = " << 1./condest << "\n";
+#endif
     cout << "X = " << gmm::sub_vector(X, gmm::sub_interval(0, 10)) << "\n";
     cout << "U = " << gmm::sub_vector(U, gmm::sub_interval(0, 10)) << "\n";
 
@@ -1159,10 +1161,15 @@ bool bilaplacian_crack_problem::solve(plain_vector &U) {
       b2[ind_sing[i]] = 0.;
     }
 
-    SuperLU_solve(A, X1, b1, condest, 1);
+#if defined(GMM_USES_MUMPS)
+    gmm::MUMPS_solve(A, X1, b1);
+    gmm::MUMPS_solve(A, X2, b2);
+#else
+    gmm::SuperLU_solve(A, X1, b1, condest, 1);
     cout << "solving for s1 OK, cond = " << 1./condest << "\n";
-    SuperLU_solve(A, X2, b2, condest, 1);
+    gmm::SuperLU_solve(A, X2, b2, condest, 1);
     cout << "solving for s2 OK, cond = " << 1./condest << "\n";
+#endif
     cout << "X1[ind_sing[0]] = " << X1[ind_sing[0]] << "\n";
     cout << "X1 = " << gmm::sub_vector(X1, gmm::sub_interval(0, 10)) << "\n";
     scalar_type max1 = 0., max2 = 0.;
diff --git a/contrib/icare/icare.cc b/contrib/icare/icare.cc
index 24d05b54..10846366 100644
--- a/contrib/icare/icare.cc
+++ b/contrib/icare/icare.cc
@@ -56,7 +56,6 @@ enum {
   NEUMANN_BOUNDARY_NUM};
 
 #if GETFEM_PARA_LEVEL > 1
-#ifdef GMM_USES_MPI
   template <typename VECT> inline void MPI_SUM_VECTOR2(VECT &V) {
     typedef typename gmm::linalg_traits<VECT>::value_type T;
     std::vector<T> W(gmm::vect_size(V));
@@ -65,7 +64,8 @@ enum {
                   MPI_SUM, MPI_COMM_WORLD);
     gmm::copy(W, V);
   }
-#endif
+#else
+  template <typename VECT> inline void MPI_SUM_VECTOR2(VECT &) {}
 #endif
 
 struct problem_definition;
@@ -994,8 +994,10 @@ void navier_stokes_problem::solve_PREDICTION_CORRECTION2() 
{
   }
   K2(nbdof_p,nbdof_p) = 0.0;
 
-  gmm::SuperLU_factor<double> SLUsys2, SLUsys3,SLUsys1;
+#if !defined(GMM_USES_MUMPS)
+  gmm::SuperLU_factor<double> SLUsys2;
   SLUsys2.build_with(K2);
+#endif
 
 
/////////////////////////////////////////////////////////////////////////////////////////////////
 // dynamic problem
@@ -1139,6 +1141,9 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
 //gmm::copy(X1,gmm::sub_vector(USTAR,gmm::sub_slice(0,nbdof_u,2)));
 //gmm::copy(X2,gmm::sub_vector(USTAR,gmm::sub_slice(1,nbdof_u,2)));
 
+#if !defined(GMM_USES_MUMPS)
+  gmm::SuperLU_factor<double> SLUsys3;
+#endif
   for (scalar_type t = Tinitial + dt; t <= T; t += dt) {
 
     //******* Construction of the Matrix for the 3rd system (to obtain 
velocity) **********//
@@ -1173,7 +1178,9 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       //gmm::add(gmm::scaled(A2u,-1.0),A2v);
       //cout <<"A2 "<< gmm::mat_norminf(A2v) << endl;
 
+#if !defined(GMM_USES_MUMPS)
       SLUsys3.build_with(A2u);
+#endif
     }
 
     if (time_order == 2 && t == Tinitial+2*dt) { // computed once
@@ -1204,7 +1211,9 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       gmm::copy(gmm::sub_matrix(A2,SUB_CT_Vu,SUB_CT_Vu),A2u);
       //gmm::copy(gmm::sub_matrix(A2,SUB_CT_Vv,SUB_CT_Vv),A2v);
 
+#if !defined(GMM_USES_MUMPS)
       SLUsys3.build_with(A2u);
+#endif
     }
 
     //******* The matrix of the 3rd system is constructed and factorized
@@ -1262,9 +1271,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       gmm::add(gmm::scaled(Un0, 2.0/dt),gmm::scaled(Unm1,-0.5/dt),Ytmp);
       gmm::mult(M, Ytmp, gmm::sub_vector(Y, I1));
     }
-#if GETFEM_PARA_LEVEL > 1
     MPI_SUM_VECTOR2(Y);
-#endif
 
     // Volumic source term -- inutile d'assambler car F = 0
     //pdef->source_term(*this, t, F);
@@ -1304,18 +1311,14 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       plain_vector VV(mf_mult.nb_dof());
       gmm::clear(VV);
       getfem::asm_source_term(VV, mim, mf_mult, mf_rhs, F, mpidirrg); // a 
optimiser indept time
-#if GETFEM_PARA_LEVEL > 1
       MPI_SUM_VECTOR2(VV);
-#endif
       gmm::copy(gmm::sub_vector(VV, SUB_CT_DIR), gmm::sub_vector(Y, I3));
     }
     {
       plain_vector VV(mf_mult.nb_dof());
       gmm::clear(VV);
       getfem::asm_source_term(VV, mim, mf_mult, mf_rhs, F, mpidircylrg); // a 
optimiser  indept time
-#if GETFEM_PARA_LEVEL > 1
       MPI_SUM_VECTOR2(VV);
-#endif
       gmm::copy(gmm::sub_vector(VV, SUB_CT_DIR_CYL), gmm::sub_vector(Y, I3C));
     }
 
@@ -1337,9 +1340,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       } else {
          asm_improved_non_reflective_bc(VV, mim, mf_u, Un0, mf_mult, dt, nu, 
mpinonrefrg);
       }
-#if GETFEM_PARA_LEVEL > 1
       MPI_SUM_VECTOR2(VV);
-#endif
       gmm::copy(gmm::sub_vector(VV, SUB_CT_NONREF), gmm::sub_vector(Y, I4));
     }
 
@@ -1347,9 +1348,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
     gmm::clear(YY);
     gmm::mult(gmm::transposed(B), gmm::scaled(Pn0, -1.0), YY);
 
-#if GETFEM_PARA_LEVEL > 1
     MPI_SUM_VECTOR2(YY);
-#endif
 
     gmm::add(YY, gmm::sub_vector(Y, I1));
 
@@ -1357,9 +1356,8 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
     gmm::mult(gmm::transposed(Bbc), Pn0, YY);
 
 
-#if GETFEM_PARA_LEVEL > 1
     MPI_SUM_VECTOR2(YY);
-#endif
+
     gmm::add(YY, gmm::sub_vector(Y, I1));
 
     gmm::clear(A1u);
@@ -1381,11 +1379,11 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
 
 #if  (GETFEM_PARA_LEVEL > 1 && GETFEM_PARA_SOLVER == MUMPS_PARA_SOLVER)
       MUMPS_distributed_matrix_solve(A1,X,Y);
-#elif (GETFEM_PARA_LEVEL==1 && defined(GMM_USES_MUMPS))
-      MUMPS_solve(A1,X,Y);
+#elif defined(GMM_USES_MUMPS)
+      gmm::MUMPS_solve(A1,X,Y);
       //#elif (GETFEM_PARA_LEVEL==0 && GMM_USES_MUMPS)
       //MUMPS_solve(A1,X,Y);
-#elif (GETFEM_PARA_LEVEL==0)
+#else
       // SuperLU_solve(A1, X, Y, rcond);
 
       //gmm::copy(gmm::sub_vector(Y,SUB_CT_Vu),Yu);
@@ -1395,6 +1393,7 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
       //SuperLU_solve(A1u, Xv, Yv, rcond);
 
       // Factorisation LU
+      gmm::SuperLU_factor<double> SLUsys1;
       SLUsys1.build_with(A1u);
       //SLUsys1.solve(Xu,Yu);
       //SLUsys1.solve(Xv,Yv);
@@ -1513,11 +1512,11 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
 
 #if  (GETFEM_PARA_LEVEL > 1 && GETFEM_PARA_SOLVER == MUMPS_PARA_SOLVER)
       MUMPS_distributed_matrix_solve(K2,X,Z);
-#elif (GETFEM_PARA_LEVEL==1 && defined(GMM_USES_MUMPS))
+#elif defined(GMM_USES_MUMPS)
       MUMPS_solve(K2,X,Z);
       //#elif (GETFEM_PARA_LEVEL==0 && GMM_USES_MUMPS)
       //MUMPS_solve(K2,X,Z);
-#elif (GETFEM_PARA_LEVEL==0)
+#else
       //SuperLU_solve(K2,X,Z,rcond);
       if (time_order == 1 || t == Tinitial+dt) { //time_order = 1 or first 
iterations with time_order = 2
         SLUsys2.solve(X, Z);
@@ -1575,11 +1574,11 @@ void 
navier_stokes_problem::solve_PREDICTION_CORRECTION2() {
 
 #if  (GETFEM_PARA_LEVEL > 1 && GETFEM_PARA_SOLVER == MUMPS_PARA_SOLVER)
       MUMPS_distributed_matrix_solve(A2,X,Y);
-#elif (GETFEM_PARA_LEVEL==1 && defined(GMM_USES_MUMPS))
+#elif defined(GMM_USES_MUMPS)
       MUMPS_solve(A2,X,Y);
       //#elif (GETFEM_PARA_LEVEL==0 && defined(GMM_USES_MUMPS))
       //MUMPS_solve(A2,X,Y);
-#elif (GETFEM_PARA_LEVEL==0)
+#else
       //SuperLU_solve(A2, X, Y, rcond);
       //SLUsys3.solve(X, Y);
 
diff --git a/contrib/xfem_contact/xfem_stokes.cc 
b/contrib/xfem_contact/xfem_stokes.cc
index 3bda6b57..fc787ab7 100644
--- a/contrib/xfem_contact/xfem_stokes.cc
+++ b/contrib/xfem_contact/xfem_stokes.cc
@@ -647,9 +647,13 @@ int main(int argc, char *argv[]) {
       plain_vector BE(nb_dof), BS(nb_dof);
       for (dal::bv_visitor i(dof_black_list); !i.finished(); ++i) {
         BE[i] = scalar_type(1);
+#if defined(GMM_USES_SUPERLU)
         // TODO: store LU decomp.
         double rcond;
         gmm::SuperLU_solve(EO, BS, BE, rcond);
+#else
+        gmm::MUMPS_solve(EO, BS, BE);
+#endif
         gmm::mult(gmm::transposed(T1), BS, gmm::mat_row(E1, i));
         BE[i] = scalar_type(0);
       }
diff --git a/contrib/xfem_stab_unilat_contact/xfem_stab_unilat_contact.cc 
b/contrib/xfem_stab_unilat_contact/xfem_stab_unilat_contact.cc
index 2e150d1a..4f5a7ea4 100644
--- a/contrib/xfem_stab_unilat_contact/xfem_stab_unilat_contact.cc
+++ b/contrib/xfem_stab_unilat_contact/xfem_stab_unilat_contact.cc
@@ -950,22 +950,29 @@ struct matrix_G {
   const sparse_matrix &S;
   mutable plain_vector W1, W2;
 
+#if defined(GMM_USES_SUPERLU)
   gmm::SuperLU_factor<scalar_type> SLUF;
+#endif
   matrix_G(const sparse_matrix &BB, const sparse_matrix &SS)
     : B(BB), S(SS), W1(gmm::mat_nrows(SS)), W2(gmm::mat_nrows(SS)) {
+#if defined(GMM_USES_SUPERLU)
     SLUF.build_with(SS);
+#endif
   }
 
 };
 
 
-
 template <typename vector1, typename vector2>
 void mult(const matrix_G &G, const vector1 &X, vector2 &Y) {
   gmm::mult(gmm::transposed(G.B), X, G.W1);
   // gmm::iteration it(1E-6, 0);
   // gmm::cg(G.S, G.W2, G.W1,  gmm::identity_matrix(), it);
+#if defined(GMM_USES_SUPERLU)
   G.SLUF.solve(G.W2, G.W1);
+#else
+  gmm::MUMPS_solve(G.S, G.W2, G.W1);
+#endif
   gmm::mult(G.B, G.W2, Y);
 }
 
diff --git a/interface/src/getfemint_precond.h 
b/interface/src/getfemint_precond.h
index 2fcdf808..d9a7a8c5 100644
--- a/interface/src/getfemint_precond.h
+++ b/interface/src/getfemint_precond.h
@@ -39,7 +39,7 @@
 #include <gmm/gmm_precond_ildltt.h>
 #include <gmm/gmm_precond_ilu.h>
 #include <gmm/gmm_precond_ilut.h>
-#include <getfem/getfem_superlu.h>
+#include <gmm/gmm_superlu_interface.h>
 #include <getfemint_gsparse.h>
 
 namespace getfemint {
@@ -69,7 +69,9 @@ namespace getfemint {
     std::unique_ptr<gmm::ildltt_precond<cscmat> > ildltt;
     std::unique_ptr<gmm::ilu_precond<cscmat> > ilu;
     std::unique_ptr<gmm::ilut_precond<cscmat> > ilut;
+#if defined(GMM_USES_SUPERLU)
     std::unique_ptr<gmm::SuperLU_factor<T> > superlu;
+#endif
 
     virtual size_type memsize() const {
       size_type sz = sizeof(*this);
@@ -81,7 +83,11 @@ namespace getfemint {
       case ILDLT:   sz += ildlt->memsize(); break;
       case ILDLTT:  sz += ildltt->memsize(); break;
       case SUPERLU:
+#if defined(GMM_USES_SUPERLU)
         sz += size_type(superlu->memsize());
+#else
+        GMM_ASSERT1(false, "GetFEM built without SuperLU support");
+#endif
         break;
       case SPMAT:   sz += gsp->memsize(); break;
       }
@@ -140,8 +146,12 @@ namespace gmm {
         else gmm::transposed_mult(*precond.ilut, v, w);
         break;
       case getfemint::gprecond_base::SUPERLU:
+#if defined(GMM_USES_SUPERLU)
         if (do_mult) precond.superlu->solve(w,v);
         else precond.superlu->solve(w,v,gmm::SuperLU_factor<T>::LU_TRANSP);
+#else
+        GMM_ASSERT1(false, "GetFEM built without SuperLU support");
+#endif
         break;
       case getfemint::gprecond_base::SPMAT:
         precond.gsp->mult_or_transposed_mult(v, w, !do_mult);
diff --git a/interface/src/gf_linsolve.cc b/interface/src/gf_linsolve.cc
index c4c6f063..d3f7da65 100644
--- a/interface/src/gf_linsolve.cc
+++ b/interface/src/gf_linsolve.cc
@@ -24,7 +24,7 @@
 #include <getfemint.h>
 #include <gmm/gmm_iter_solvers.h>
 #include <getfemint_misc.h>
-#include <getfem/getfem_superlu.h>
+#include <gmm/gmm_superlu_interface.h>
 #include <gmm/gmm_MUMPS_interface.h>
 
 using namespace getfemint;
@@ -85,6 +85,7 @@ void iterative_gmm_solver(iterative_gmm_solver_type stype,
   else                  iterative_gmm_solver(stype, gsp, in, out, 
scalar_type());
 }
 
+#if defined(GMM_USES_SUPERLU)
 template <typename T> static void
 superlu_solver(gsparse &gsp,
                getfemint::mexargs_in& in, getfemint::mexargs_out& out, T) {
@@ -96,6 +97,7 @@ superlu_solver(gsparse &gsp,
   if (out.remaining())
     out.pop().from_scalar(rcond ? 1./rcond : 0.);
 }
+#endif
 
 #if defined(GMM_USES_MUMPS)
 template <typename T> static void
@@ -178,6 +180,7 @@ void gf_linsolve(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
        );
 
 
+#if defined(GMM_USES_SUPERLU)
     /*@FUNC @CELL{U, cond} = ('lu', @tsp M, @vec b)
       Alias for ::LINSOLVE('superlu',...)@*/
     sub_command
@@ -203,6 +206,7 @@ void gf_linsolve(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
        if (gsp.is_complex()) superlu_solver(gsp, in, out, complex_type());
        else                  superlu_solver(gsp, in, out, scalar_type());
        );
+#endif
 
 #if defined(GMM_USES_MUMPS)
     /*@FUNC @CELL{U, cond} = ('mumps', @tsp M, @vec b)
diff --git a/interface/src/gf_precond.cc b/interface/src/gf_precond.cc
index 970eeff7..dfd42514 100644
--- a/interface/src/gf_precond.cc
+++ b/interface/src/gf_precond.cc
@@ -68,6 +68,7 @@ precond_ilut(gsparse &M, int additional_fillin, double 
threshold, mexargs_out& o
   p.ilut = std::make_unique<gmm::ilut_precond<typename 
gprecond<T>::cscmat>>(M.csc(T()), additional_fillin, threshold);
 }
 
+#if defined(GMM_USES_SUPERLU)
 template <typename T> static void
 precond_superlu(gsparse &M, mexargs_out& out, T) {
   gprecond<T> &p = precond_new(out, T());
@@ -75,6 +76,7 @@ precond_superlu(gsparse &M, mexargs_out& out, T) {
   p.superlu = std::make_unique<gmm::SuperLU_factor<T>>();
   p.superlu.get()->build_with(M.csc(T()));
 }
+#endif
 
 static void precond_spmat(gsparse *gsp, mexargs_out& out) {
   if (gsp->is_complex()) {
@@ -213,6 +215,7 @@ void gf_precond(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
                                         out, scalar_type());
        );
 
+#if defined(GMM_USES_SUPERLU)
     /*@INIT PC = ('superlu', @tsp m)
       Uses SuperLU to build an exact factorization of the sparse matrix `m`.
       This preconditioner is only available if the getfem-interface was
@@ -224,6 +227,7 @@ void gf_precond(getfemint::mexargs_in& m_in, 
getfemint::mexargs_out& m_out) {
        if (M->is_complex()) precond_superlu(*M, out, complex_type());
        else                 precond_superlu(*M, out, scalar_type());
        );
+#endif
 
     /*@INIT PC = ('spmat', @tsp m)
       Preconditioner given explicitely by a sparse matrix.@*/
diff --git a/interface/src/gfi_array.h b/interface/src/gfi_array.h
index 835e8234..8173b657 100644
--- a/interface/src/gfi_array.h
+++ b/interface/src/gfi_array.h
@@ -33,7 +33,7 @@
 #define GFI_ARRAY
 
 #include <sys/types.h>
-#ifdef USE_RPC
+#ifdef GETFEM_USE_RPC
 # include <rpc/types.h>
 #else
 # ifndef __u_char_defined
diff --git a/interface/src/octave/gfi_array.h b/interface/src/octave/gfi_array.h
index e12c6b2c..990905fa 100644
--- a/interface/src/octave/gfi_array.h
+++ b/interface/src/octave/gfi_array.h
@@ -33,7 +33,7 @@
 #define GFI_ARRAY
 
 #include <sys/types.h>
-#ifdef USE_RPC
+#ifdef GETFEM_USE_RPC
 # include <rpc/types.h>
 #else
 # ifndef __u_char_defined
diff --git a/interface/src/python/getfem_python.c 
b/interface/src/python/getfem_python.c
index eb9a4641..0f5c6f5d 100644
--- a/interface/src/python/getfem_python.c
+++ b/interface/src/python/getfem_python.c
@@ -795,18 +795,16 @@ getfem_env(PyObject *self, PyObject *args) {
     word_out = PyUnicode_FromString("http://home.gna.org/getfem/";);
   } else if (strcmp(word_in,"license") == 0) {
     word_out = PyUnicode_FromString("GNU LGPL v3");
-  } else if (strcmp(word_in,"package") == 0) {
-    word_out = PyUnicode_FromString(GETFEM_PACKAGE);
-  } else if (strcmp(word_in,"package_name") == 0) {
+  } else if (strcmp(word_in,"package") == 0 ||
+             strcmp(word_in,"package_name") == 0) {
     word_out = PyUnicode_FromString(GETFEM_PACKAGE_NAME);
   } else if (strcmp(word_in,"package_string") == 0) {
     word_out = PyUnicode_FromString(GETFEM_PACKAGE_STRING);
-  } else if(strcmp(word_in,"package_tarname") == 0) {
+  } else if (strcmp(word_in,"package_tarname") == 0) {
     word_out = PyUnicode_FromString(GETFEM_PACKAGE_TARNAME);
-  } else if(strcmp(word_in,"package_version") == 0 ||
-            strcmp(word_in,"release") == 0) {
-    word_out = PyUnicode_FromString(GETFEM_PACKAGE_VERSION);
-  } else if(strcmp(word_in,"version") == 0) {
+  } else if (strcmp(word_in,"package_version") == 0 ||
+             strcmp(word_in,"release") == 0 ||
+             strcmp(word_in,"version") == 0) {
     word_out = PyUnicode_FromString(GETFEM_VERSION);
   } else {
     word_out = PyUnicode_FromString("");
diff --git a/src/Makefile.am b/src/Makefile.am
index 1e10d741..c002f042 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -158,7 +158,6 @@ nobase_include_HEADERS =                            \
        getfem/getfem_nonlinear_elasticity.h            \
        getfem/getfem_fourth_order.h                    \
        getfem/getfem_Navier_Stokes.h                   \
-       getfem/getfem_superlu.h                         \
        getfem/getfem_plasticity.h                      \
        getfem/getfem_omp.h                             \
        getfem/getfem_continuation.h                    \
@@ -190,7 +189,6 @@ SRC =                                               \
        bgeot_ftool.cc                                  \
        getfem_models.cc                                \
        getfem_model_solvers.cc                         \
-       getfem_superlu.cc                               \
        getfem_mesh.cc                                  \
        getfem_mesh_region.cc                           \
        getfem_context.cc                               \
diff --git a/src/getfem/bgeot_config.h b/src/getfem/bgeot_config.h
index ee5e7783..9c7ce5e9 100644
--- a/src/getfem/bgeot_config.h
+++ b/src/getfem/bgeot_config.h
@@ -37,6 +37,7 @@
 #ifndef BGEOT_CONFIG_H__
 #define BGEOT_CONFIG_H__
 
+#include "gmm/gmm_arch_config.h"
 #include "getfem/getfem_arch_config.h"
 
 #if defined(GETFEM_HAVE_FEENABLEEXCEPT)
diff --git a/src/getfem/getfem_arch_config.h.in 
b/src/getfem/getfem_arch_config.h.in
new file mode 100644
index 00000000..d50a5028
--- /dev/null
+++ b/src/getfem/getfem_arch_config.h.in
@@ -0,0 +1,49 @@
+/* manually created getfem_arch_config.h.in template to be used by autotools.  
*/
+
+/* enable openblas to be multithreaded */
+#undef GETFEM_FORCE_SINGLE_THREAD_BLAS
+
+/* defined if GetFEM is built with OpenMP parallelization */
+#undef GETFEM_HAS_OPENMP
+
+/* glibc backtrace function */
+#undef GETFEM_HAVE_BACKTRACE
+
+/* defined if the cxxabi.h header file is available */
+#undef GETFEM_HAVE_CXXABI_H
+
+/* glibc floating point exceptions control */
+#undef GETFEM_HAVE_FEENABLEEXCEPT
+
+/* defined if the <libqhull_r/qhull_ra.h> header file is available */
+#undef GETFEM_HAVE_LIBQHULL_R_QHULL_RA_H
+
+/* defined if the Metis library was found and is working */
+#undef GETFEM_HAVE_METIS
+
+/* defined if the Metis library found is older than version 4 */
+#undef GETFEM_HAVE_METIS_OLD_API
+
+/* defined if the qd library was found and is working */
+#undef GETFEM_HAVE_QDLIB
+
+/* GetFEM package name */
+#undef GETFEM_PACKAGE_NAME
+
+/* GetFEM package string */
+#undef GETFEM_PACKAGE_STRING
+
+/* GetFEM package tarname */
+#undef GETFEM_PACKAGE_TARNAME
+
+/* Parallelization level (0|1|2) */
+#undef GETFEM_PARA_LEVEL
+
+/* defined if quad-doubles are to be used instead of double-double */
+#undef GETFEM_QDLIB_USE_QUAD
+
+/* Use rpc for getfem communication with matlab */
+#undef GETFEM_USE_RPC
+
+/* GetFEM version */
+#undef GETFEM_VERSION
diff --git a/src/getfem/getfem_config.h b/src/getfem/getfem_config.h
index b04219fe..64bd5e1d 100644
--- a/src/getfem/getfem_config.h
+++ b/src/getfem/getfem_config.h
@@ -167,13 +167,6 @@
 #define GETFEM_MPI_INIT(argc, argv) {GMM_TRACE1("Running sequential Getfem");}
 #define GETFEM_MPI_FINALIZE {}
 
-#if defined(GETFEM_HAVE_DMUMPS_C_H)
-# if !defined(GMM_USES_MUMPS)
-#   define GMM_USES_MUMPS
-# endif
-#endif
-
-
 #if GMM_USES_MPI > 0
 # include <mpi.h>
 
@@ -190,28 +183,12 @@
 # undef GETFEM_MPI_FINALIZE 
 # define GETFEM_MPI_FINALIZE { MPI_Finalize(); }
 
-// GETFEM_PARA_SOLVER is the parallelisation solver used
-//    MUMPS       : use direct parallel solver MUMPS
-//    SCHWARZADD  : use a Schwarz additive method
-#define MUMPS_PARA_SOLVER 1
-#define SCHWARZADD_PARA_SOLVER 2
-
-# ifndef GETFEM_PARA_SOLVER
-#   define GETFEM_PARA_SOLVER MUMPS_PARA_SOLVER
-# endif
-
-# if GETFEM_PARA_SOLVER == MUMPS_PARA_SOLVER
-#  ifndef GMM_USES_MUMPS
-#    define GMM_USES_MUMPS
-#  endif
-# endif
-
 #endif
 
 
 #include "bgeot_tensor.h"
 #include "bgeot_poly.h"
-#include "getfem_superlu.h"
+
 
 /// GEneric Tool for Finite Element Methods.
 namespace getfem {
@@ -221,6 +198,17 @@ namespace getfem {
   using gmm::vref;
 
 #if GETFEM_PARA_LEVEL > 1
+
+// GETFEM_PARA_SOLVER is the parallelisation solver used
+//    MUMPS       : use direct parallel solver MUMPS
+//    SCHWARZADD  : use a Schwarz additive method
+# define MUMPS_PARA_SOLVER 1
+# define SCHWARZADD_PARA_SOLVER 2
+
+# ifndef GETFEM_PARA_SOLVER
+#   define GETFEM_PARA_SOLVER MUMPS_PARA_SOLVER
+# endif
+
   template <typename T> inline T MPI_SUM_SCALAR(T a) {
     T b; MPI_Allreduce(&a,&b,1,gmm::mpi_type(a), MPI_SUM, MPI_COMM_WORLD);
     return b;
diff --git a/src/getfem/getfem_model_solvers.h 
b/src/getfem/getfem_model_solvers.h
index 4472bb9e..da570c01 100644
--- a/src/getfem/getfem_model_solvers.h
+++ b/src/getfem/getfem_model_solvers.h
@@ -40,6 +40,7 @@
 #ifndef GETFEM_MODEL_SOLVERS_H__
 #define GETFEM_MODEL_SOLVERS_H__
 #include "getfem_models.h"
+#include "gmm/gmm_superlu_interface.h"
 #include "gmm/gmm_MUMPS_interface.h"
 #include "gmm/gmm_iter.h"
 #include "gmm/gmm_iter_solvers.h"
@@ -141,6 +142,7 @@ namespace getfem {
     }
   };
 
+#if defined(GMM_USES_SUPERLU)
   template <typename MAT, typename VECT>
   struct linear_solver_superlu
     : public abstract_linear_solver<MAT, VECT> {
@@ -150,11 +152,12 @@ namespace getfem {
       /*gmm::HarwellBoeing_IO::write("test.hb", M);
       std::fstream f("bbb", std::ios::out);
       for (unsigned i=0; i < gmm::vect_size(b); ++i) f << b[i] << "\n";*/
-      int info = SuperLU_solve(M, x, b, rcond);
+      int info = gmm::SuperLU_solve(M, x, b, rcond);
       iter.enforce_converged(info == 0);
       if (iter.get_noisy()) cout << "condition number: " << 1.0/rcond<< endl;
     }
   };
+#endif
 
   template <typename MAT, typename VECT>
   struct linear_solver_dense_lu : public abstract_linear_solver<MAT, VECT> {
@@ -640,8 +643,11 @@ namespace getfem {
         return std::make_shared<linear_solver_mumps_sym<MATRIX, VECTOR>>();
       else
         return std::make_shared<linear_solver_mumps<MATRIX, VECTOR>>();
-# else
+# elif defined(GMM_USES_SUPERLU)
       return std::make_shared<linear_solver_superlu<MATRIX, VECTOR>>();
+# else
+      static_assert(false,
+                    "At least one direct solver (MUMPS or SuperLU) is 
required");
 # endif
     }
     else {
@@ -665,8 +671,13 @@ namespace getfem {
   std::shared_ptr<abstract_linear_solver<MATRIX, VECTOR>>
   select_linear_solver(const model &md, const std::string &name) {
     std::shared_ptr<abstract_linear_solver<MATRIX, VECTOR>> p;
-    if (bgeot::casecmp(name, "superlu") == 0)
+    if (bgeot::casecmp(name, "superlu") == 0) {
+#if defined(GMM_USES_SUPERLU)
       return std::make_shared<linear_solver_superlu<MATRIX, VECTOR>>();
+#else
+      GMM_ASSERT1(false, "SuperLU is not interfaced");
+#endif
+    }
     else if (bgeot::casecmp(name, "dense_lu") == 0)
       return std::make_shared<linear_solver_dense_lu<MATRIX, VECTOR>>();
     else if (bgeot::casecmp(name, "mumps") == 0) {
@@ -723,10 +734,10 @@ namespace getfem {
   add a special traitement on the problem, etc ...  This is in
   fact a model for your own solver.
 
-  For small problems, a direct solver is used
-  (getfem::SuperLU_solve), for larger problems, a conjugate
-  gradient gmm::cg (if the problem is coercive) or a gmm::gmres is
-  used (preconditioned with an incomplete factorization).
+  For small problems, a direct solver is used (gmm::SuperLU_solve),
+  for larger problems, a conjugate gradient gmm::cg (if the problem is
+  coercive) or a gmm::gmres is used (preconditioned with an incomplete
+  factorization).
 
   When MPI/METIS is enabled, a partition is done via METIS, and a parallel
   solver can be used.
diff --git a/src/getfem/getfem_superlu.h b/src/getfem/getfem_superlu.h
deleted file mode 100644
index 06b7c738..00000000
--- a/src/getfem/getfem_superlu.h
+++ /dev/null
@@ -1,130 +0,0 @@
-/* -*- c++ -*- (enables emacs c++ mode) */
-/*===========================================================================
-
- Copyright (C) 2004-2020 Julien Pommier
-
- This file is a part of GetFEM
-
- GetFEM  is  free software;  you  can  redistribute  it  and/or modify it
- under  the  terms  of the  GNU  Lesser General Public License as published
- by  the  Free Software Foundation;  either version 3 of the License,  or
- (at your option) any later version along with the GCC Runtime Library
- Exception either version 3.1 or (at your option) any later version.
- This program  is  distributed  in  the  hope  that it will be useful,  but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- License and GCC Runtime Library Exception for more details.
- You  should  have received a copy of the GNU Lesser General Public License
- along  with  this program;  if not, write to the Free Software Foundation,
- Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-
- As a special exception, you  may use  this file  as it is a part of a free
- software  library  without  restriction.  Specifically,  if   other  files
- instantiate  templates  or  use macros or inline functions from this file,
- or  you compile this  file  and  link  it  with other files  to produce an
- executable, this file  does  not  by itself cause the resulting executable
- to be covered  by the GNU Lesser General Public License.  This   exception
- does not  however  invalidate  any  other  reasons why the executable file
- might be covered by the GNU Lesser General Public License.
-
-===========================================================================*/
-
-/**@file getfem_superlu.h
-   @author  Julien Pommier <Julien.Pommier@insa-toulouse.fr>
-   @date August 2004.
-   @brief SuperLU interface for getfem
-   
-   We do not use gmm_superlu_interface.h for a good reason. This file
-   does not include any of the superlu headers, hence when getfem is
-   installed, it does not need to install the superlu headers.
-*/
-
-#ifndef GETFEM_SUPERLU
-#define GETFEM_SUPERLU
-#ifndef GMM_USES_SUPERLU
-#define GMM_USES_SUPERLU
-#endif
-#include "getfem_config.h"
-#include "gmm/gmm_kernel.h"
-
-namespace gmm {
-
-  template<typename T>
-  int SuperLU_solve(const gmm::csc_matrix<T> &A, T *X_, T *B, double& rcond_, 
int permc_spec = 3);
-  /** solve a sparse linear system AX=B (float, double, complex<float>
-      or complex<double>) via SuperLU.
-
-      @param A the matrix (a copy is made if A is not a gmm::csc_matrix)
-      @param X the solution.
-      @param B the right hand side.
-      @param rcond_ contains on output an estimate of the condition number of 
A.
-      @param permc_spec specify the kind of renumbering than SuperLU should do.
-  */
-  template<typename MAT, typename V1, typename V2>
-  int SuperLU_solve(const MAT &A, const V1& X, const V2& B, double& rcond_, 
int permc_spec = 3) {
-    typedef typename gmm::linalg_traits<MAT>::value_type T;
-    
-    int m = int(mat_nrows(A)), n = int(mat_ncols(A));
-    gmm::csc_matrix<T> csc_A(m,n); 
-    gmm::copy(A,csc_A);
-    std::vector<T> rhs(m), sol(m);
-    gmm::copy(B, rhs);
-    
-    int info = SuperLU_solve(csc_A, &sol[0], &rhs[0], rcond_, permc_spec);
-    gmm::copy(sol, const_cast<V1 &>(X));
-    return info;
-  }
-  
-  struct SuperLU_factor_impl_common;
-
-  /** Factorization of a sparse matrix with SuperLU.
-      
-      This class can be used as a preconditioner for gmm iterative solvers.
-  */
-  template <class T> class SuperLU_factor {
-    std::shared_ptr<SuperLU_factor_impl_common> impl;
-  public :
-    enum { LU_NOTRANSP, LU_TRANSP, LU_CONJUGATED };
-
-    /** Do the factorization of the supplied sparse matrix. */
-    template <class MAT> void build_with(const MAT &A,  int permc_spec = 3) {
-      int m = int(mat_nrows(A)), n = int(mat_ncols(A));
-      gmm::csc_matrix<T> csc_A(m,n); 
-      gmm::copy(A,csc_A);
-      build_with(csc_A, permc_spec);
-    }
-    void build_with(const gmm::csc_matrix<T> &A, int permc_spec = 3);
-    template <typename VECTX, typename VECTB> 
-    /** After factorization, do the triangular solves.
-       transp = LU_NOTRANSP   -> solves Ax = B
-       transp = LU_TRANSP     -> solves A'x = B
-       transp = LU_CONJUGATED -> solves conj(A)X = B 
-    */
-    void solve(const VECTX &X, const VECTB &B, int transp=LU_NOTRANSP) const {
-      gmm::copy(B, rhs());
-      solve(transp);
-      gmm::copy(sol(),const_cast<VECTX &>(X));
-    }
-    void solve(int transp=LU_NOTRANSP) const;
-    std::vector<T> &sol() const;
-    std::vector<T> &rhs() const;
-    SuperLU_factor();
-    float memsize() const;
-    SuperLU_factor(const SuperLU_factor& other);
-    SuperLU_factor& operator=(const SuperLU_factor& other);
-  };
-
-  template <typename T, typename V1, typename V2> inline
-  void mult(const SuperLU_factor<T>& P, const V1 &v1, const V2 &v2) {
-    P.solve(v2,v1);
-  }
-
-  template <typename T, typename V1, typename V2> inline
-  void transposed_mult(const SuperLU_factor<T>& P,const V1 &v1,const V2 &v2) {
-    P.solve(v2, v1, SuperLU_factor<T>::LU_TRANSP);
-  }
-}
-
-extern "C" void set_superlu_callback(int (*cb)());
-
-#endif
diff --git a/src/getfem_superlu.cc b/src/getfem_superlu.cc
deleted file mode 100644
index fe7d2803..00000000
--- a/src/getfem_superlu.cc
+++ /dev/null
@@ -1,430 +0,0 @@
-/*===========================================================================
-
- Copyright (C) 2004-2020 Julien Pommier
-
- This file is a part of GetFEM
-
- GetFEM  is  free software;  you  can  redistribute  it  and/or modify it
- under  the  terms  of the  GNU  Lesser General Public License as published
- by  the  Free Software Foundation;  either version 3 of the License,  or
- (at your option) any later version along with the GCC Runtime Library
- Exception either version 3.1 or (at your option) any later version.
- This program  is  distributed  in  the  hope  that it will be useful,  but
- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
- License and GCC Runtime Library Exception for more details.
- You  should  have received a copy of the GNU Lesser General Public License
- along  with  this program;  if not, write to the Free Software Foundation,
- Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-
-===========================================================================*/
-
-#include "getfem/getfem_superlu.h"
-
-typedef int int_t;
-
-/* because slu_util.h defines TRUE and FALSE ... */
-#ifdef TRUE
-# undef TRUE
-#endif
-#ifdef FALSE
-# undef FALSE
-#endif
-
-#include "superlu/slu_Cnames.h"
-#include "superlu/supermatrix.h"
-#include "superlu/slu_util.h"
-
-namespace SuperLU_S {
-#include "superlu/slu_sdefs.h"
-}
-namespace SuperLU_D {
-#include "superlu/slu_ddefs.h"
-}
-namespace SuperLU_C {
-#include "superlu/slu_cdefs.h"
-}
-namespace SuperLU_Z {
-#include "superlu/slu_zdefs.h"
-}
-
-
-
-namespace gmm {
-
-  /*  interface for Create_CompCol_Matrix */
-
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                                    float *a, int *ir, int *jc) {
-    SuperLU_S::sCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
-                                      SLU_NC, SLU_S, SLU_GE);
-  }
-
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                                    double *a, int *ir, int *jc) {
-    SuperLU_D::dCreate_CompCol_Matrix(A, m, n, nnz, a, ir, jc,
-                                      SLU_NC, SLU_D, SLU_GE);
-  }
-
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                                    std::complex<float> *a, int *ir, int *jc) {
-    SuperLU_C::cCreate_CompCol_Matrix(A, m, n, nnz, (SuperLU_C::complex *)(a),
-                                      ir, jc, SLU_NC, SLU_C, SLU_GE);
-  }
-
-  inline void Create_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz,
-                             std::complex<double> *a, int *ir, int *jc) {
-    SuperLU_Z::zCreate_CompCol_Matrix(A, m, n, nnz,
-                                      (SuperLU_Z::doublecomplex *)(a), ir, jc,
-                                      SLU_NC, SLU_Z, SLU_GE);
-  }
-
-  /*  interface for Create_Dense_Matrix */
-
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, float *a, int 
k)
-  { SuperLU_S::sCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_S, SLU_GE); }
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n, double *a, int 
k)
-  { SuperLU_D::dCreate_Dense_Matrix(A, m, n, a, k, SLU_DN, SLU_D, SLU_GE); }
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n,
-                                  std::complex<float> *a, int k) {
-    SuperLU_C::cCreate_Dense_Matrix(A, m, n, (SuperLU_C::complex *)(a),
-                                    k, SLU_DN, SLU_C, SLU_GE);
-  }
-  inline void Create_Dense_Matrix(SuperMatrix *A, int m, int n,
-                                  std::complex<double> *a, int k) {
-    SuperLU_Z::zCreate_Dense_Matrix(A, m, n, (SuperLU_Z::doublecomplex *)(a),
-                                    k, SLU_DN, SLU_Z, SLU_GE);
-  }
-
-  /*  interface for gssv */
-
-#define DECL_GSSV(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \
-  inline void SuperLU_gssv(superlu_options_t *options, SuperMatrix *A, int *p, 
\
-  int *q, SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,               \
-  SuperLUStat_t *stats, int *info, KEYTYPE) {                           \
-  NAMESPACE::FNAME(options, A, p, q, L, U, B, stats, info);             \
-  }
-
-  DECL_GSSV(SuperLU_S,sgssv,float,float)
-  DECL_GSSV(SuperLU_C,cgssv,float,std::complex<float>)
-  DECL_GSSV(SuperLU_D,dgssv,double,double)
-  DECL_GSSV(SuperLU_Z,zgssv,double,std::complex<double>)
-
-  /*  interface for gssvx */
-
-#define DECL_GSSVX(NAMESPACE,FNAME,FLOATTYPE,KEYTYPE) \
-    inline float SuperLU_gssvx(superlu_options_t *options, SuperMatrix *A,     
    \
-                               int *perm_c, int *perm_r, int *etree, char 
*equed,  \
-                               FLOATTYPE *R, FLOATTYPE *C, SuperMatrix *L,     
    \
-                               SuperMatrix *U, void *work, int lwork,          
    \
-                               SuperMatrix *B, SuperMatrix *X,                 
    \
-                               FLOATTYPE *recip_pivot_growth,                  
    \
-                               FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE 
*berr, \
-                               SuperLUStat_t *stats, int *info, KEYTYPE) {     
    \
-    NAMESPACE::mem_usage_t mem_usage;                                    \
-    NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L,  \
-                     U, work, lwork, B, X, recip_pivot_growth, rcond,    \
-                     ferr, berr, &mem_usage, stats, info);               \
-    return mem_usage.for_lu; /* bytes used by the factor storage */     \
-  }
-
-  DECL_GSSVX(SuperLU_S,sgssvx,float,float)
-  DECL_GSSVX(SuperLU_C,cgssvx,float,std::complex<float>)
-  DECL_GSSVX(SuperLU_D,dgssvx,double,double)
-  DECL_GSSVX(SuperLU_Z,zgssvx,double,std::complex<double>)
-
-  /* ********************************************************************* */
-  /*   SuperLU solve interface                                             */
-  /* ********************************************************************* */
-
-  template<typename T>
-  int SuperLU_solve(const gmm::csc_matrix<T> &csc_A, T *sol, T *rhs,
-                    double& rcond_, int permc_spec) {
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = 0: use the natural ordering
-     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
-     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
-     *   permc_spec = 3: use approximate minimum degree column ordering
-     */
-    typedef typename gmm::number_traits<T>::magnitude_type R;
-
-    int m = int(mat_nrows(csc_A)), n = int(mat_ncols(csc_A));
-    int nrhs = 1, info = 0, nz = int(nnz(csc_A));
-
-    GMM_ASSERT1(nz != 0, "Cannot factor a matrix full of zeros!");
-    GMM_ASSERT1(n == m, "Cannot factor a non-square matrix");
-
-    if ((2 * nz / n) >= m)
-      GMM_WARNING2("CAUTION : it seems that SuperLU has a problem"
-                   " for nearly dense sparse matrices");
-
-    superlu_options_t options;
-    set_default_options(&options);
-    options.ColPerm = NATURAL;
-    options.PrintStat = NO;
-    options.ConditionNumber = YES;
-    switch (permc_spec) {
-    case 1 : options.ColPerm = MMD_ATA; break;
-    case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
-    case 3 : options.ColPerm = COLAMD; break;
-    }
-    SuperLUStat_t stat;
-    StatInit(&stat);
-
-    SuperMatrix SA, SL, SU, SB, SX; // SuperLU format.
-    Create_CompCol_Matrix(&SA, m, n, nz, const_cast<T*>(&csc_A.pr[0]),
-                          const_cast<int *>((const int *)(&csc_A.ir[0])),
-                          const_cast<int *>((const int *)(&csc_A.jc[0])));
-    Create_Dense_Matrix(&SB, m, nrhs, &rhs[0], m);
-    Create_Dense_Matrix(&SX, m, nrhs, &sol[0], m);
-    memset(&SL,0,sizeof SL);
-    memset(&SU,0,sizeof SU);
-
-    std::vector<int> etree(n);
-    char equed[] = "B";
-    std::vector<R> Rscale(m),Cscale(n); // row scale factors
-    std::vector<R> ferr(nrhs), berr(nrhs);
-    R recip_pivot_gross, rcond;
-    std::vector<int> perm_r(m), perm_c(n);
-
-    SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0],
-                  &etree[0] /* output */, equed /* output         */,
-                  &Rscale[0] /* row scale factors (output)        */,
-                  &Cscale[0] /* col scale factors (output)        */,
-                  &SL /* fact L (output)*/, &SU /* fact U (output)*/,
-                  NULL /* work                                    */,
-                  0 /* lwork: superlu auto allocates (input)      */,
-                  &SB /* rhs */, &SX /* solution                  */,
-                  &recip_pivot_gross /* reciprocal pivot growth   */
-                  /* factor max_j( norm(A_j)/norm(U_j) ).         */,
-                  &rcond /*estimate of the reciprocal condition   */
-                  /* number of the matrix A after equilibration   */,
-                  &ferr[0] /* estimated forward error             */,
-                  &berr[0] /* relative backward error             */,
-                  &stat, &info, T());
-    rcond_ = rcond;
-    if (SB.Store) Destroy_SuperMatrix_Store(&SB);
-    if (SX.Store) Destroy_SuperMatrix_Store(&SX);
-    if (SA.Store) Destroy_SuperMatrix_Store(&SA);
-    if (SL.Store) Destroy_SuperNode_Matrix(&SL);
-    if (SU.Store) Destroy_CompCol_Matrix(&SU);
-    StatFree(&stat);
-    GMM_ASSERT1(info != -333333333, "SuperLU was cancelled."); // user 
interruption (for matlab interface)
-
-    GMM_ASSERT1(info >= 0, "SuperLU solve failed: info =" << info);
-    if (info > 0) GMM_WARNING1("SuperLU solve failed: info =" << info);
-    return info;
-  }
-
-  template int SuperLU_solve(const gmm::csc_matrix<float> &csc_A, float *sol, 
float *rhs, double& rcond_, int permc_spec);
-  template int SuperLU_solve(const gmm::csc_matrix<double> &csc_A, double 
*sol, double *rhs, double& rcond_, int permc_spec);
-  template int SuperLU_solve(const gmm::csc_matrix<std::complex<float> > 
&csc_A, std::complex<float> *sol, std::complex<float> *rhs, double& rcond_, int 
permc_spec);
-  template int SuperLU_solve(const gmm::csc_matrix<std::complex<double> > 
&csc_A, std::complex<double> *sol, std::complex<double> *rhs, double& rcond_, 
int permc_spec);
-
-  struct SuperLU_factor_impl_common {
-    mutable SuperMatrix SA, SL, SB, SU, SX;
-    mutable SuperLUStat_t stat;
-    mutable superlu_options_t options;
-    float memory_used;
-    mutable bool is_init;
-    mutable char equed;
-    void free_supermatrix() {
-      if (is_init) {
-        if (SB.Store) Destroy_SuperMatrix_Store(&SB);
-        if (SX.Store) Destroy_SuperMatrix_Store(&SX);
-        if (SA.Store) Destroy_SuperMatrix_Store(&SA);
-        if (SL.Store) Destroy_SuperNode_Matrix(&SL);
-        if (SU.Store) Destroy_CompCol_Matrix(&SU);
-      }
-    }
-    SuperLU_factor_impl_common() : is_init(false) {}
-    virtual ~SuperLU_factor_impl_common() { free_supermatrix(); }
-  };
-
-  template <typename T> struct SuperLU_factor_impl : public 
SuperLU_factor_impl_common {
-    typedef typename gmm::number_traits<T>::magnitude_type R;
-
-    std::vector<int> etree, perm_r, perm_c;
-    std::vector<R> Rscale, Cscale;
-    std::vector<R> ferr, berr;
-    std::vector<T> rhs;
-    std::vector<T> sol;
-    void build_with(const gmm::csc_matrix<T> &A, int permc_spec);
-    void solve(int transp);
-  };
-
-  template <typename T>
-  void SuperLU_factor_impl<T>::build_with(const gmm::csc_matrix<T> &A, int 
permc_spec) {
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = 0: use the natural ordering
-     *   permc_spec = 1: use minimum degree ordering on structure of A'*A
-     *   permc_spec = 2: use minimum degree ordering on structure of A'+A
-     *   permc_spec = 3: use approximate minimum degree column ordering
-     */
-    free_supermatrix();
-    int n = int(mat_nrows(A)), m = int(mat_ncols(A)), info = 0;
-
-    rhs.resize(m); sol.resize(m);
-    gmm::clear(rhs);
-    int nz = int(nnz(A));
-
-    GMM_ASSERT1(nz != 0, "Cannot factor a matrix full of zeros!");
-    GMM_ASSERT1(n == m, "Cannot factor a non-square matrix");
-
-    set_default_options(&options);
-    options.ColPerm = NATURAL;
-    options.PrintStat = NO;
-    options.ConditionNumber = NO;
-    switch (permc_spec) {
-      case 1 : options.ColPerm = MMD_ATA; break;
-      case 2 : options.ColPerm = MMD_AT_PLUS_A; break;
-      case 3 : options.ColPerm = COLAMD; break;
-    }
-    StatInit(&stat);
-
-    Create_CompCol_Matrix(&SA, m, n, nz, const_cast<T*>(&A.pr[0]),
-                          const_cast<int *>((const int *)(&A.ir[0])),
-                          const_cast<int *>((const int *)(&A.jc[0])));
-    Create_Dense_Matrix(&SB, m, 0, &rhs[0], m);
-    Create_Dense_Matrix(&SX, m, 0, &sol[0], m);
-    memset(&SL,0,sizeof SL);
-    memset(&SU,0,sizeof SU);
-
-
-    equed = 'B';
-    Rscale.resize(m); Cscale.resize(n); etree.resize(n);
-    ferr.resize(1); berr.resize(1);
-    R recip_pivot_gross, rcond;
-    perm_r.resize(m); perm_c.resize(n);
-    memory_used = SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0],
-                                &etree[0] /* output */, &equed /* output       
 */,
-                                &Rscale[0] /* row scale factors (output)       
 */,
-                                &Cscale[0] /* col scale factors (output)       
 */,
-                                &SL /* fact L (output)*/, &SU /* fact U 
(output)*/,
-                                NULL /* work                                   
 */,
-                                0 /* lwork: superlu auto allocates (input)     
 */,
-                                &SB /* rhs */, &SX /* solution                 
 */,
-                                &recip_pivot_gross /* reciprocal pivot growth  
 */
-                                /* factor max_j( norm(A_j)/norm(U_j) ).        
 */,
-                                &rcond /*estimate of the reciprocal condition  
 */
-                                /* number of the matrix A after equilibration  
 */,
-                                &ferr[0] /* estimated forward error            
 */,
-                                &berr[0] /* relative backward error            
 */,
-                                &stat, &info, T());
-
-    Destroy_SuperMatrix_Store(&SB);
-    Destroy_SuperMatrix_Store(&SX);
-    Create_Dense_Matrix(&SB, m, 1, &rhs[0], m);
-    Create_Dense_Matrix(&SX, m, 1, &sol[0], m);
-    StatFree(&stat);
-
-    GMM_ASSERT1(info != -333333333, "SuperLU was cancelled.");
-    GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
-    is_init = true;
-  }
-
-  template <typename T>
-  void SuperLU_factor_impl<T>::solve(int transp) {
-    options.Fact = FACTORED;
-    options.IterRefine = NOREFINE;
-    switch (transp) {
-      case SuperLU_factor<T>::LU_NOTRANSP: options.Trans = NOTRANS; break;
-      case SuperLU_factor<T>::LU_TRANSP: options.Trans = TRANS; break;
-      case SuperLU_factor<T>::LU_CONJUGATED: options.Trans = CONJ; break;
-      default: GMM_ASSERT1(false, "invalid value for transposition option");
-    }
-    StatInit(&stat);
-    int info = 0;
-    R recip_pivot_gross, rcond;
-    SuperLU_gssvx(&options, &SA, &perm_c[0], &perm_r[0],
-                  &etree[0] /* output */, &equed /* output        */,
-                  &Rscale[0] /* row scale factors (output)        */,
-                  &Cscale[0] /* col scale factors (output)        */,
-                  &SL /* fact L (output)*/, &SU /* fact U (output)*/,
-                  NULL /* work                                    */,
-                  0 /* lwork: superlu auto allocates (input)      */,
-                  &SB /* rhs */, &SX /* solution                  */,
-                  &recip_pivot_gross /* reciprocal pivot growth   */
-                  /* factor max_j( norm(A_j)/norm(U_j) ).         */,
-                  &rcond /*estimate of the reciprocal condition   */
-                  /* number of the matrix A after equilibration   */,
-                  &ferr[0] /* estimated forward error             */,
-                  &berr[0] /* relative backward error             */,
-                  &stat, &info, T());
-    StatFree(&stat);
-    GMM_ASSERT1(info == 0, "SuperLU solve failed: info=" << info);
-  }
-
-  template<typename T> void
-  SuperLU_factor<T>::build_with(const gmm::csc_matrix<T> &A, int permc_spec) {
-    ((SuperLU_factor_impl<T>*)impl.get())->build_with(A,permc_spec);
-  }
-
-  template<typename T> void
-  SuperLU_factor<T>::solve(int transp) const {
-    ((SuperLU_factor_impl<T>*)impl.get())->solve(transp);
-  }
-
-  template<typename T> std::vector<T> &
-  SuperLU_factor<T>::sol() const {
-    return ((SuperLU_factor_impl<T>*)impl.get())->sol;
-  }
-
-  template<typename T> std::vector<T> &
-  SuperLU_factor<T>::rhs() const {
-    return ((SuperLU_factor_impl<T>*)impl.get())->rhs;
-  }
-
-  template<typename T>
-  SuperLU_factor<T>::SuperLU_factor() {
-    impl = std::make_shared<SuperLU_factor_impl<T>>();
-  }
-
-  template<typename T>
-  SuperLU_factor<T>::SuperLU_factor(const SuperLU_factor& other) {
-    impl =  std::make_shared<SuperLU_factor_impl<T>>();
-    GMM_ASSERT1(!(other.impl->is_init),
-                "copy of initialized SuperLU_factor is forbidden");
-    other.impl->is_init = false;
-  }
-
-  template<typename T> SuperLU_factor<T>&
-  SuperLU_factor<T>::operator=(const SuperLU_factor& other) {
-    GMM_ASSERT1(!(other.impl->is_init) && !(impl->is_init),
-                "assignment of initialized SuperLU_factor is forbidden");
-    return *this;
-  }
-
-  template<typename T> float
-  SuperLU_factor<T>::memsize() const {
-    return impl->memory_used;
-  }
-
-  /*  void force_instantiation() {
-    SuperLU_factor<float> a;
-    SuperLU_factor<double> b;
-    SuperLU_factor<std::complex<float> > c;
-    SuperLU_factor<std::complex<double> > d;
-    //a = 0; b = 0; c = 0; d = 0;
-  }
-  */
-}
-
-template class gmm::SuperLU_factor<float>;
-template class gmm::SuperLU_factor<double>;
-template class gmm::SuperLU_factor<std::complex<float> >;
-template class gmm::SuperLU_factor<std::complex<double> >;
-
-static int (*superlu_callback)();
-
-/* this one is called from superlu (see dcolumn_bmod) */
-extern "C" int handle_getfem_callback() {
-  if (superlu_callback) return superlu_callback();
-  else return 0;
-}
-
-extern "C" void set_superlu_callback(int (*cb)()) {
-  superlu_callback = cb;
-}
diff --git a/src/gmm/gmm_arch_config.h.in b/src/gmm/gmm_arch_config.h.in
new file mode 100644
index 00000000..0d46b9ab
--- /dev/null
+++ b/src/gmm/gmm_arch_config.h.in
@@ -0,0 +1,25 @@
+/* manually created gmm_arch_config.h.in template to be used by autotools.  */
+
+/* gcc style __PRETTY_FUNCTION__ macro */
+#undef GMM_HAVE_PRETTY_FUNCTION
+
+/* defined if GMM is linked to a blas library */
+#undef GMM_USES_BLAS
+
+/* defined if GMM is linked to a lapack library */
+#undef GMM_USES_LAPACK
+
+/* defined if GMM uses MPI */
+#undef GMM_USES_MPI
+
+/* defined if GMM is linked to the mumps library */
+#undef GMM_USES_MUMPS
+
+/* defined if GMM is linked to the superlu library */
+#undef GMM_USES_SUPERLU
+
+/* Use blas with 64 bits integers */
+#undef GMM_USE_BLAS64_INTERFACE
+
+/* GMM version */
+#undef GMM_VERSION
diff --git a/src/gmm/gmm_solver_Schwarz_additive.h 
b/src/gmm/gmm_solver_Schwarz_additive.h
index 2d6e9fdd..378534e1 100644
--- a/src/gmm/gmm_solver_Schwarz_additive.h
+++ b/src/gmm/gmm_solver_Schwarz_additive.h
@@ -39,7 +39,11 @@
 #define GMM_SOLVERS_SCHWARZ_ADDITIVE_H__
 
 #include "gmm_kernel.h"
+#if defined(GMM_USES_SUPERLU)
 #include "gmm_superlu_interface.h"
+#else
+#include "gmm_MUMPS_interface.h"
+#endif
 #include "gmm_solver_cg.h"
 #include "gmm_solver_gmres.h"
 #include "gmm_solver_bicgstab.h"
@@ -568,8 +572,12 @@ namespace gmm {
       x.resize(gmm::mat_ncols(M.vB[i]));
       gmm::mult(M.vM[i], p, v);
       gmm::mult(gmm::transposed(M.vB[i]), v, w);
+#if defined(GMM_USES_SUPERLU)
       double rcond;
-      SuperLU_solve(M.vMloc[i], x, w, rcond);
+      gmm::SuperLU_solve(M.vMloc[i], x, w, rcond);
+#else
+      gmm::MUMPS_solve(M.vMloc[i], x, w);
+#endif
       // gmm::iteration iter(1E-10, 0, 100000);
       //gmm::gmres(M.vMloc[i], x, w, gmm::identity_matrix(), 50, iter);
       gmm::mult_add(M.vB[i], x, q);
diff --git a/src/gmm/gmm_superlu_interface.h b/src/gmm/gmm_superlu_interface.h
index 964efd43..aecaa875 100644
--- a/src/gmm/gmm_superlu_interface.h
+++ b/src/gmm/gmm_superlu_interface.h
@@ -34,7 +34,7 @@
    @date October 17, 2003.
    @brief Interface with SuperLU (LU direct solver for sparse matrices).
 */
-#if defined(GMM_USES_SUPERLU) && !defined(GETFEM_VERSION)
+#if defined(GMM_USES_SUPERLU)
 
 #ifndef GMM_SUPERLU_INTERFACE_H
 #define GMM_SUPERLU_INTERFACE_H
@@ -144,11 +144,12 @@ namespace gmm {
                                FLOATTYPE *recip_pivot_growth,                  
    \
                                FLOATTYPE *rcond, FLOATTYPE *ferr, FLOATTYPE 
*berr, \
                                SuperLUStat_t *stats, int *info, KEYTYPE) {     
    \
-    NAMESPACE::mem_usage_t mem_usage;                                    \
+    mem_usage_t mem_usage;                                                     
    \
+    GlobalLU_t Glu;                                                            
    \
     NAMESPACE::FNAME(options, A, perm_c, perm_r, etree, equed, R, C, L,  \
                      U, work, lwork, B, X, recip_pivot_growth, rcond,    \
-                     ferr, berr, &mem_usage, stats, info);               \
-    return mem_usage.for_lu; /* bytes used by the factor storage */     \
+                     ferr, berr, &Glu, &mem_usage, stats, info);         \
+    return mem_usage.for_lu; /* bytes used by the factor storage */      \
   }
 
   DECL_GSSVX(SuperLU_S,sgssvx,float,float)
diff --git a/superlu/BLAS.c b/superlu/BLAS.c
deleted file mode 100644
index 21df5005..00000000
--- a/superlu/BLAS.c
+++ /dev/null
@@ -1,43902 +0,0 @@
-/* BLAS.f -- translated by f2c
-   You must link the resulting object file with the libraries:
-       -lf2c -lm   (in that order)
-
-   the f2c-ed file has been slightly modified (removal of lsame_, added r_sign)
-
-   Original fortran source files are distributed along with this package in 
the sub-directory BLAS
-*/
-
-/*
-
-  The reference BLAS is a freely-available software package. It is available 
from netlib via anonymous ftp
-  and the World Wide Web. Thus, it can be included in commercial software 
packages (and has been). We only
-  ask that proper credit be given to the authors.
-
-  Like all software, it is copyrighted. It is not trademarked, but we do ask 
the following:
-
-  If you modify the source for these routines we ask that you change the name 
of the routine and comment
-  the changes made to the original.
-
-  We will gladly answer any questions regarding the software. If a 
modification is done, however, it is the
-  responsibility of the person who modified the routine to provide support.
-
-  see https://www.openhub.net/licenses/blas
-*/
-
-/* Copyright (C) 2004-2020 Julien Pommier
-
-  This file is a part of GetFEM++
-
-  GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
-  under  the  terms  of the  GNU  Lesser General Public License as published
-  by  the  Free Software Foundation;  either  version 3  of the License,  or
-  (at your option) any later version along with the GCC Runtime Library
-  Exception either version 3.1 or (at your option) any later version.
-  This program  is  distributed  in  the  hope  that it will be useful,  but
-  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-  License and GCC Runtime Library Exception for more details.
-  You  should  have received a copy of the GNU Lesser General Public License
-  along  with  this program;  if not, write to the Free Software Foundation,
-  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-
-*/
-
-#include "BLAS_f2c.h"
-
-/* Table of constant values */
-
-static complex c_b21 = {1.f,0.f};
-static doublereal c_b876 = 1.;
-static real c_b1543 = 1.f;
-static integer c__1 = 1;
-static doublecomplex c_b2094 = {1.,0.};
-
-/* Subroutine */ int caxpy_(integer *n, complex *ca, complex *cx, integer *
-       incx, complex *cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, ix, iy;
-
-
-/*     constant times a vector plus a vector. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if ((r__1 = ca->r, dabs(r__1)) + (r__2 = r_imag(ca), dabs(r__2)) == 0.f) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = iy;
-       i__4 = ix;
-       q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
-               i__4].i + ca->i * cx[i__4].r;
-       q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       i__4 = i__;
-       q__2.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__2.i = ca->r * cx[
-               i__4].i + ca->i * cx[i__4].r;
-       q__1.r = cy[i__3].r + q__2.r, q__1.i = cy[i__3].i + q__2.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* caxpy_ */
-
-/* Subroutine */ int ccopy_(integer *n, complex *cx, integer *incx, complex *
-       cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-
-
-/*     copies a vector, x, to a vector, y. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = ix;
-       cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       cy[i__2].r = cx[i__3].r, cy[i__2].i = cx[i__3].i;
-/* L30: */
-    }
-    return 0;
-} /* ccopy_ */
-
-/* Complex */ VOID cdotc_(complex * ret_val, integer *n, complex *cx, integer 
-       *incx, complex *cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
-/*     forms the dot product of two vectors, conjugating the first */
-/*     vector. */
-/*     jack dongarra, linpack,  3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    ctemp.r = 0.f, ctemp.i = 0.f;
-     ret_val->r = 0.f,  ret_val->i = 0.f;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       r_cnjg(&q__3, &cx[ix]);
-       i__2 = iy;
-       q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * 
-               cy[i__2].i + q__3.i * cy[i__2].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       r_cnjg(&q__3, &cx[i__]);
-       i__2 = i__;
-       q__2.r = q__3.r * cy[i__2].r - q__3.i * cy[i__2].i, q__2.i = q__3.r * 
-               cy[i__2].i + q__3.i * cy[i__2].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-/* L30: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-} /* cdotc_ */
-
-/* Complex */ VOID cdotu_(complex * ret_val, integer *n, complex *cx, integer 
-       *incx, complex *cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    complex q__1, q__2;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
-/*     forms the dot product of two vectors. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    ctemp.r = 0.f, ctemp.i = 0.f;
-     ret_val->r = 0.f,  ret_val->i = 0.f;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       i__3 = iy;
-       q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = 
-               cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       q__2.r = cx[i__2].r * cy[i__3].r - cx[i__2].i * cy[i__3].i, q__2.i = 
-               cx[i__2].r * cy[i__3].i + cx[i__2].i * cy[i__3].r;
-       q__1.r = ctemp.r + q__2.r, q__1.i = ctemp.i + q__2.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-/* L30: */
-    }
-     ret_val->r = ctemp.r,  ret_val->i = ctemp.i;
-    return ;
-} /* cdotu_ */
-
-/* Subroutine */ int cgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, complex *alpha, complex *a, integer *lda, complex *x, 
-       integer *incx, complex *beta, complex *y, integer *incy, ftnlen 
-       trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static complex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CGBMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-
-/*     y := alpha*conjg( A' )*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("CGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   i__2 = jx;
-                   q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       i__2 = i__;
-                       i__3 = i__;
-                       i__5 = k + i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + 
-                               q__2.i;
-                       y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-                   i__4 = jx;
-                   q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, 
-                           q__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = iy;
-                       i__2 = iy;
-                       i__5 = k + i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + 
-                               q__2.i;
-                       y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       i__3 = k + i__ + j * a_dim1;
-                       i__4 = i__;
-                       q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       r_cnjg(&q__3, &a[k + i__ + j * a_dim1]);
-                       i__2 = i__;
-                       q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                               q__2.i = q__3.r * x[i__2].i + q__3.i * x[i__2]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__4 = jy;
-               i__2 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-               y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               ix = kx;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = k + i__ + j * a_dim1;
-                       i__2 = ix;
-                       q__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[i__2]
-                               .i, q__2.i = a[i__4].r * x[i__2].i + a[i__4]
-                               .i * x[i__2].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       r_cnjg(&q__3, &a[k + i__ + j * a_dim1]);
-                       i__3 = ix;
-                       q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                               q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L140: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CGBMV . */
-
-} /* cgbmv_ */
-
-/* Subroutine */ int cgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, complex *alpha, complex *a, integer *lda, complex *b, 
-       integer *ldb, complex *beta, complex *c__, integer *ldc, ftnlen 
-       transa_len, ftnlen transb_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static complex temp;
-    static logical conja, conjb;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CGEMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-
-/*  where  op( X ) is one of */
-
-/*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ), */
-
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ). */
-
-/*           Unchanged on exit. */
-
-/*  TRANSB - CHARACTER*1. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-
-/*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ). */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and */
-/*     B  respectively are to be  transposed but  not conjugated  and set */
-/*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A */
-/*     and the number of rows of  B  respectively. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    conja = lsame_(transa, "C", (ftnlen)1, (ftnlen)1);
-    conjb = lsame_(transb, "C", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! nota && ! conja && ! lsame_(transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! conjb && ! lsame_(transb, "T", (ftnlen)1, (ftnlen)
-           1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("CGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) 
-           && (beta->r == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (beta->r == 0.f && beta->i == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (notb) {
-       if (nota) {
-
-/*           Form  C := alpha*A*B + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = l + j * b_dim1;
-                   if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                       i__3 = l + j * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else if (conja) {
-
-/*           Form  C := alpha*conjg( A' )*B + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L140: */
-               }
-/* L150: */
-           }
-       }
-    } else if (nota) {
-       if (conjb) {
-
-/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L160: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L170: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                       r_cnjg(&q__2, &b[j + l * b_dim1]);
-                       q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
-                               q__1.i = alpha->r * q__2.i + alpha->i * 
-                               q__2.r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L180: */
-                       }
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       } else {
-
-/*           Form  C := alpha*A*B'          + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L210: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L220: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                       i__3 = j + l * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L230: */
-                       }
-                   }
-/* L240: */
-               }
-/* L250: */
-           }
-       }
-    } else if (conja) {
-       if (conjb) {
-
-/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       r_cnjg(&q__4, &b[j + l * b_dim1]);
-                       q__2.r = q__3.r * q__4.r - q__3.i * q__4.i, q__2.i = 
-                               q__3.r * q__4.i + q__3.i * q__4.r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L260: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L270: */
-               }
-/* L280: */
-           }
-       } else {
-
-/*           Form  C := alpha*conjg( A' )*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = j + l * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L290: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L300: */
-               }
-/* L310: */
-           }
-       }
-    } else {
-       if (conjb) {
-
-/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       r_cnjg(&q__3, &b[j + l * b_dim1]);
-                       q__2.r = a[i__4].r * q__3.r - a[i__4].i * q__3.i, 
-                               q__2.i = a[i__4].r * q__3.i + a[i__4].i * 
-                               q__3.r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L320: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L330: */
-               }
-/* L340: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = j + l * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L350: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L360: */
-               }
-/* L370: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CGEMM . */
-
-} /* cgemm_ */
-
-/* Subroutine */ int cgemv_(char *trans, integer *m, integer *n, complex *
-       alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
-       beta, complex *y, integer *incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CGEMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-
-/*     y := alpha*conjg( A' )*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   i__2 = jx;
-                   q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__;
-                       i__4 = i__;
-                       i__5 = i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + 
-                               q__2.i;
-                       y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   i__2 = jx;
-                   q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = iy;
-                       i__4 = iy;
-                       i__5 = i__ + j * a_dim1;
-                       q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + 
-                               q__2.i;
-                       y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__;
-                       q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                       i__3 = i__;
-                       q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                               q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0.f, temp.i = 0.f;
-               ix = kx;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = ix;
-                       q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, q__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                       i__3 = ix;
-                       q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                               q__2.i = q__3.r * x[i__3].i + q__3.i * x[i__3]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp.r - alpha->i * temp.i, q__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jy += *incy;
-/* L140: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CGEMV . */
-
-} /* cgemv_ */
-
-/* Subroutine */ int cgerc_(integer *m, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static complex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CGERC  performs the rank 1 operation */
-
-/*     A := alpha*x*conjg( y' ) + A, */
-
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CGERC ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               r_cnjg(&q__2, &y[jy]);
-               q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                       alpha->r * q__2.i + alpha->i * q__2.r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               r_cnjg(&q__2, &y[jy]);
-               q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                       alpha->r * q__2.i + alpha->i * q__2.r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-
-    return 0;
-
-/*     End of CGERC . */
-
-} /* cgerc_ */
-
-/* Subroutine */ int cgeru_(integer *m, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static complex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CGERU  performs the rank 1 operation */
-
-/*     A := alpha*x*y' + A, */
-
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CGERU ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               i__2 = jy;
-               q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0.f || y[i__2].i != 0.f) {
-               i__2 = jy;
-               q__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, q__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = q__1.r, temp.i = q__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, q__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + q__2.i;
-                   a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-
-    return 0;
-
-/*     End of CGERU . */
-
-} /* cgeru_ */
-
-/* Subroutine */ int chbmv_(char *uplo, integer *n, integer *k, complex *
-       alpha, complex *a, integer *lda, complex *x, integer *incx, complex *
-       beta, complex *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHBMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian band matrix, with k super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CHBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-           beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__2 = i__;
-                   q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, q__2.i =
-                            q__3.r * x[i__2].i + q__3.i * x[i__2].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-               }
-               i__4 = j;
-               i__2 = j;
-               i__3 = kplus1 + j * a_dim1;
-               r__1 = a[i__3].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__2].r + q__3.r, q__2.i = y[i__2].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               q__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, q__1.i =
-                        alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-                   y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-                            q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = kplus1 + j * a_dim1;
-               r__1 = a[i__2].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__4].r + q__3.r, q__2.i = y[i__4].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = j;
-               q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__3 = j;
-               i__4 = j;
-               i__2 = j * a_dim1 + 1;
-               r__1 = a[i__2].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   i__4 = i__;
-                   i__2 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-                   y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = i__;
-                   q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-                            q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L90: */
-               }
-               i__3 = j;
-               i__4 = j;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = jx;
-               q__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, q__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = j * a_dim1 + 1;
-               r__1 = a[i__2].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__2].r + q__2.r, q__1.i = y[i__2].i + q__2.i;
-                   y[i__4].r = q__1.r, y[i__4].i = q__1.i;
-                   r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, q__2.i =
-                            q__3.r * x[i__4].i + q__3.i * x[i__4].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-               y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHBMV . */
-
-} /* chbmv_ */
-
-/* Subroutine */ int chemm_(char *side, char *uplo, integer *m, integer *n, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       complex *beta, complex *c__, integer *ldc, ftnlen side_len, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4, q__5;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHEMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*A*B + beta*C, */
-
-/*  or */
-
-/*     C := alpha*B*A + beta*C, */
-
-/*  where alpha and beta are scalars, A is an hermitian matrix and  B and */
-/*  C are m by n matrices. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  hermitian matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  hermitian  matrix   A  is  to  be */
-/*           referenced as follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Note that the imaginary parts  of the diagonal elements need */
-/*           not be set, they are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set NROWA as the number of rows of A. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CHEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (beta->r == 0.f && beta->i == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           q__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               q__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                               q__2.i;
-                       c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-                       i__4 = k + j * b_dim1;
-                       r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                       q__2.r = b[i__4].r * q__3.r - b[i__4].i * q__3.i, 
-                               q__2.i = b[i__4].r * q__3.i + b[i__4].i * 
-                               q__3.r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       r__1 = a[i__4].r;
-                       q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       r__1 = a[i__5].r;
-                       q__4.r = r__1 * temp1.r, q__4.i = r__1 * temp1.i;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           q__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               q__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       q__1.r = c__[i__4].r + q__2.r, q__1.i = c__[i__4].i + 
-                               q__2.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       i__3 = k + j * b_dim1;
-                       r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                       q__2.r = b[i__3].r * q__3.r - b[i__3].i * q__3.i, 
-                               q__2.i = b[i__3].r * q__3.i + b[i__3].i * 
-                               q__3.r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       r__1 = a[i__3].r;
-                       q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, q__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       r__1 = a[i__4].r;
-                       q__4.r = r__1 * temp1.r, q__4.i = r__1 * temp1.i;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*B*A + beta*C. */
-
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           r__1 = a[i__2].r;
-           q__1.r = r__1 * alpha->r, q__1.i = r__1 * alpha->i;
-           temp1.r = q__1.r, temp1.i = q__1.i;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   q__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           q__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   q__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           q__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   r_cnjg(&q__2, &a[j + k * a_dim1]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   r_cnjg(&q__2, &a[j + k * a_dim1]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-
-    return 0;
-
-/*     End of CHEMM . */
-
-} /* chemm_ */
-
-/* Subroutine */ int chemv_(char *uplo, integer *n, complex *alpha, complex *
-       a, integer *lda, complex *x, integer *incx, complex *beta, complex *y,
-        integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHEMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("CHEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-           beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when A is stored in upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when A is stored in lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               r__1 = a[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           q__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHEMV . */
-
-} /* chemv_ */
-
-/* Subroutine */ int cher_(char *uplo, integer *n, real *alpha, complex *x, 
-       integer *incx, complex *a, integer *lda, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHER   performs the hermitian rank 1 operation */
-
-/*     A := alpha*x*conjg( x' ) + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CHER  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in upper triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in lower triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = a[i__4].r + q__2.r, q__1.i = a[i__4].i + 
-                               q__2.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHER  . */
-
-} /* cher_ */
-
-/* Subroutine */ int cher2_(char *uplo, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *a, integer *lda,
-        ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHER2  performs the hermitian rank 2 operation */
-
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n hermitian matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CHER2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = a[i__3].r + q__1.r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = a[i__4].r + q__3.r, q__2.i = a[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       a[i__3].r = q__1.r, a[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   r__1 = a[i__3].r;
-                   a[i__2].r = r__1, a[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHER2 . */
-
-} /* cher2_ */
-
-/* Subroutine */ int cher2k_(char *uplo, char *trans, integer *n, integer *k, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       real *beta, complex *c__, integer *ldc, ftnlen uplo_len, ftnlen 
-       trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    real r__1;
-    complex q__1, q__2, q__3, q__4, q__5, q__6;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHER2K  performs one of the hermitian rank 2k operations */
-
-/*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, */
-
-/*  or */
-
-/*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, */
-
-/*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n */
-/*  hermitian matrix and  A and B  are  n by k matrices in the first case */
-/*  and  k by n  matrices in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          + */
-/*                                         conjg( alpha )*B*conjg( A' ) + */
-/*                                         beta*C. */
-
-/*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          + */
-/*                                         conjg( alpha )*conjg( B' )*A + */
-/*                                         beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-/*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CHER2K", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && *beta ==
-            1.f) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + */
-/*                   C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       r_cnjg(&q__2, &b[j + l * b_dim1]);
-                       q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
-                               q__1.i = alpha->r * q__2.i + alpha->i * 
-                               q__2.r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       r_cnjg(&q__1, &q__2);
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               q__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               q__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       r_cnjg(&q__2, &b[j + l * b_dim1]);
-                       q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, 
-                               q__1.i = alpha->r * q__2.i + alpha->i * 
-                               q__2.r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       r_cnjg(&q__1, &q__2);
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       q__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               q__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       q__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               q__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + */
-/*                   C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       r_cnjg(&q__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L190: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.f) {
-                           i__3 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = *beta * c__[i__4].r + q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       }
-                   } else {
-                       if (*beta == 0.f) {
-                           i__3 = i__ + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           q__3.r = *beta * c__[i__4].r, q__3.i = *beta * 
-                                   c__[i__4].i;
-                           q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + 
-                                   q__4.i;
-                           r_cnjg(&q__6, alpha);
-                           q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, 
-                                   q__5.i = q__6.r * temp2.i + q__6.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + 
-                                   q__5.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       }
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4].i, 
-                               q__2.i = q__3.r * b[i__4].i + q__3.i * b[i__4]
-                               .r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       r_cnjg(&q__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L220: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.f) {
-                           i__3 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           r__1 = *beta * c__[i__4].r + q__1.r;
-                           c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       }
-                   } else {
-                       if (*beta == 0.f) {
-                           i__3 = i__ + j * c_dim1;
-                           q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           r_cnjg(&q__4, alpha);
-                           q__3.r = q__4.r * temp2.r - q__4.i * temp2.i, 
-                                   q__3.i = q__4.r * temp2.i + q__4.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + 
-                                   q__3.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           q__3.r = *beta * c__[i__4].r, q__3.i = *beta * 
-                                   c__[i__4].i;
-                           q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   q__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + 
-                                   q__4.i;
-                           r_cnjg(&q__6, alpha);
-                           q__5.r = q__6.r * temp2.r - q__6.i * temp2.i, 
-                                   q__5.i = q__6.r * temp2.i + q__6.i * 
-                                   temp2.r;
-                           q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + 
-                                   q__5.i;
-                           c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       }
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHER2K. */
-
-} /* cher2k_ */
-
-/* Subroutine */ int cherk_(char *uplo, char *trans, integer *n, integer *k, 
-       real *alpha, complex *a, integer *lda, real *beta, complex *c__, 
-       integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    real r__1;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static real rtemp;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHERK  performs one of the hermitian rank k operations */
-
-/*     C := alpha*A*conjg( A' ) + beta*C, */
-
-/*  or */
-
-/*     C := alpha*conjg( A' )*A + beta*C, */
-
-/*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian */
-/*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n */
-/*  matrix in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C. */
-
-/*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-/*  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("CHERK ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       r_cnjg(&q__2, &a[j + l * a_dim1]);
-                       q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = i__ + l * a_dim1;
-                       q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = *beta * c__[i__4].r, q__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       r_cnjg(&q__2, &a[j + l * a_dim1]);
-                       q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       q__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               q__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       r__1 = c__[i__4].r + q__1.r;
-                       c__[i__3].r = r__1, c__[i__3].i = 0.f;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*conjg( A' )*A + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                   }
-                   if (*beta == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
-                               i__4].i;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L200: */
-               }
-               rtemp = 0.f;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   r_cnjg(&q__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
-                            q__3.r * a[i__3].i + q__3.i * a[i__3].r;
-                   q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
-                   rtemp = q__1.r;
-/* L210: */
-               }
-               if (*beta == 0.f) {
-                   i__2 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-/* L220: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               rtemp = 0.f;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   r_cnjg(&q__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   q__2.r = q__3.r * a[i__3].r - q__3.i * a[i__3].i, q__2.i =
-                            q__3.r * a[i__3].i + q__3.i * a[i__3].r;
-                   q__1.r = rtemp + q__2.r, q__1.i = q__2.i;
-                   rtemp = q__1.r;
-/* L230: */
-               }
-               if (*beta == 0.f) {
-                   i__2 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   r__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = r__1, c__[i__2].i = 0.f;
-               }
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       r_cnjg(&q__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       q__2.r = q__3.r * a[i__4].r - q__3.i * a[i__4].i, 
-                               q__2.i = q__3.r * a[i__4].i + q__3.i * a[i__4]
-                               .r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L240: */
-                   }
-                   if (*beta == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = *alpha * temp.r, q__1.i = *alpha * temp.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = *alpha * temp.r, q__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = *beta * c__[i__4].r, q__3.i = *beta * c__[
-                               i__4].i;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L250: */
-               }
-/* L260: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHERK . */
-
-} /* cherk_ */
-
-/* Subroutine */ int chpmv_(char *uplo, integer *n, complex *alpha, complex *
-       ap, complex *x, integer *incx, complex *beta, complex *y, integer *
-       incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHPMV  performs the matrix-vector operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CHPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r == 1.f && 
-           beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1.f || beta->i != 0.f) {
-       if (*incy == 1) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0.f, y[i__2].i = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   q__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           q__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ++k;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk + j - 1;
-               r__1 = ap[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk + j - 1;
-               r__1 = ap[i__4].r;
-               q__3.r = r__1 * temp1.r, q__3.i = r__1 * temp1.i;
-               q__2.r = y[i__3].r + q__3.r, q__2.i = y[i__3].i + q__3.i;
-               q__4.r = alpha->r * temp2.r - alpha->i * temp2.i, q__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk;
-               r__1 = ap[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = i__;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ++k;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               q__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, q__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = q__1.r, temp1.i = q__1.i;
-               temp2.r = 0.f, temp2.i = 0.f;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk;
-               r__1 = ap[i__4].r;
-               q__2.r = r__1 * temp1.r, q__2.i = r__1 * temp1.i;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   q__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           q__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   q__1.r = y[i__4].r + q__2.r, q__1.i = y[i__4].i + q__2.i;
-                   y[i__3].r = q__1.r, y[i__3].i = q__1.i;
-                   r_cnjg(&q__3, &ap[k]);
-                   i__3 = ix;
-                   q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, q__2.i =
-                            q__3.r * x[i__3].i + q__3.i * x[i__3].r;
-                   q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               q__2.r = alpha->r * temp2.r - alpha->i * temp2.i, q__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               q__1.r = y[i__3].r + q__2.r, q__1.i = y[i__3].i + q__2.i;
-               y[i__2].r = q__1.r, y[i__2].i = q__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHPMV . */
-
-} /* chpmv_ */
-
-/* Subroutine */ int chpr_(char *uplo, integer *n, real *alpha, complex *x, 
-       integer *incx, complex *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    real r__1;
-    complex q__1, q__2;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHPR    performs the hermitian rank 1 operation */
-
-/*     A := alpha*x*conjg( x' ) + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("CHPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   q__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, q__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[j]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                   r_cnjg(&q__2, &x[jx]);
-                   q__1.r = *alpha * q__2.r, q__1.i = *alpha * q__2.i;
-                   temp.r = q__1.r, temp.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   q__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, q__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               q__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       q__1.r = ap[i__4].r + q__2.r, q__1.i = ap[i__4].i + 
-                               q__2.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHPR  . */
-
-} /* chpr_ */
-
-/* Subroutine */ int chpr2_(char *uplo, integer *n, complex *alpha, complex *
-       x, integer *incx, complex *y, integer *incy, complex *ap, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5, i__6;
-    real r__1;
-    complex q__1, q__2, q__3, q__4;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CHPR2  performs the hermitian rank 2 operation */
-
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CHPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0.f && alpha->i == 0.f) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[j]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = j;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = i__;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0.f || x[i__2].i != 0.f || (y[i__3].r != 0.f 
-                       || y[i__3].i != 0.f)) {
-                   r_cnjg(&q__2, &y[jy]);
-                   q__1.r = alpha->r * q__2.r - alpha->i * q__2.i, q__1.i = 
-                           alpha->r * q__2.i + alpha->i * q__2.r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   i__2 = jx;
-                   q__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           q__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   r_cnjg(&q__1, &q__2);
-                   temp2.r = q__1.r, temp2.i = q__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   q__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           q__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   q__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           q__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   r__1 = ap[i__3].r + q__1.r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       q__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               q__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       q__2.r = ap[i__4].r + q__3.r, q__2.i = ap[i__4].i + 
-                               q__3.i;
-                       i__6 = iy;
-                       q__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               q__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + q__4.i;
-                       ap[i__3].r = q__1.r, ap[i__3].i = q__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   r__1 = ap[i__3].r;
-                   ap[i__2].r = r__1, ap[i__2].i = 0.f;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CHPR2 . */
-
-} /* chpr2_ */
-
-/* Subroutine */ int crotg_(complex *ca, complex *cb, real *c__, complex *s)
-{
-    /* System generated locals */
-    real r__1, r__2;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    double c_abs(complex *), sqrt(doublereal);
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static real norm;
-    static complex alpha;
-    static real scale;
-
-    if (c_abs(ca) != 0.f) {
-       goto L10;
-    }
-    *c__ = 0.f;
-    s->r = 1.f, s->i = 0.f;
-    ca->r = cb->r, ca->i = cb->i;
-    goto L20;
-L10:
-    scale = c_abs(ca) + c_abs(cb);
-    q__1.r = ca->r / scale, q__1.i = ca->i / scale;
-/* Computing 2nd power */
-    r__1 = c_abs(&q__1);
-    q__2.r = cb->r / scale, q__2.i = cb->i / scale;
-/* Computing 2nd power */
-    r__2 = c_abs(&q__2);
-    norm = scale * sqrt(r__1 * r__1 + r__2 * r__2);
-    r__1 = c_abs(ca);
-    q__1.r = ca->r / r__1, q__1.i = ca->i / r__1;
-    alpha.r = q__1.r, alpha.i = q__1.i;
-    *c__ = c_abs(ca) / norm;
-    r_cnjg(&q__3, cb);
-    q__2.r = alpha.r * q__3.r - alpha.i * q__3.i, q__2.i = alpha.r * q__3.i + 
-           alpha.i * q__3.r;
-    q__1.r = q__2.r / norm, q__1.i = q__2.i / norm;
-    s->r = q__1.r, s->i = q__1.i;
-    q__1.r = norm * alpha.r, q__1.i = norm * alpha.i;
-    ca->r = q__1.r, ca->i = q__1.i;
-L20:
-    return 0;
-} /* crotg_ */
-
-/* Subroutine */ int cscal_(integer *n, complex *ca, complex *cx, integer *
-       incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    complex q__1;
-
-    /* Local variables */
-    static integer i__, nincx;
-
-
-/*     scales a vector by a constant. */
-/*     jack dongarra, linpack,  3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       i__3 = i__;
-       i__4 = i__;
-       q__1.r = ca->r * cx[i__4].r - ca->i * cx[i__4].i, q__1.i = ca->r * cx[
-               i__4].i + ca->i * cx[i__4].r;
-       cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       i__1 = i__;
-       i__3 = i__;
-       q__1.r = ca->r * cx[i__3].r - ca->i * cx[i__3].i, q__1.i = ca->r * cx[
-               i__3].i + ca->i * cx[i__3].r;
-       cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* cscal_ */
-
-/* Subroutine */ int csrot_(integer *n, complex *cx, integer *incx, complex *
-       cy, integer *incy, real *c__, real *s)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    complex q__1, q__2, q__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
-/*     applies a plane rotation, where the cos and sin (c and s) are real */
-/*     and the vectors cx and cy are complex. */
-/*     jack dongarra, linpack, 3/11/78. */
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
-       i__3 = iy;
-       q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
-       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       i__2 = iy;
-       i__3 = iy;
-       q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
-       i__4 = ix;
-       q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
-       q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-       i__2 = ix;
-       cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       q__2.r = *c__ * cx[i__2].r, q__2.i = *c__ * cx[i__2].i;
-       i__3 = i__;
-       q__3.r = *s * cy[i__3].r, q__3.i = *s * cy[i__3].i;
-       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-       ctemp.r = q__1.r, ctemp.i = q__1.i;
-       i__2 = i__;
-       i__3 = i__;
-       q__2.r = *c__ * cy[i__3].r, q__2.i = *c__ * cy[i__3].i;
-       i__4 = i__;
-       q__3.r = *s * cx[i__4].r, q__3.i = *s * cx[i__4].i;
-       q__1.r = q__2.r - q__3.r, q__1.i = q__2.i - q__3.i;
-       cy[i__2].r = q__1.r, cy[i__2].i = q__1.i;
-       i__2 = i__;
-       cx[i__2].r = ctemp.r, cx[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* csrot_ */
-
-/* Subroutine */ int csscal_(integer *n, real *sa, complex *cx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    real r__1, r__2;
-    complex q__1;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, nincx;
-
-
-/*     scales a complex vector by a real constant. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       i__3 = i__;
-       i__4 = i__;
-       r__1 = *sa * cx[i__4].r;
-       r__2 = *sa * r_imag(&cx[i__]);
-       q__1.r = r__1, q__1.i = r__2;
-       cx[i__3].r = q__1.r, cx[i__3].i = q__1.i;
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       i__1 = i__;
-       i__3 = i__;
-       r__1 = *sa * cx[i__3].r;
-       r__2 = *sa * r_imag(&cx[i__]);
-       q__1.r = r__1, q__1.i = r__2;
-       cx[i__1].r = q__1.r, cx[i__1].i = q__1.i;
-/* L30: */
-    }
-    return 0;
-} /* csscal_ */
-
-/* Subroutine */ int cswap_(integer *n, complex *cx, integer *incx, complex *
-       cy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static complex ctemp;
-
-
-/*     interchanges two vectors. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cy;
-    --cx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
-       i__2 = ix;
-       i__3 = iy;
-       cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
-       i__2 = iy;
-       cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       ctemp.r = cx[i__2].r, ctemp.i = cx[i__2].i;
-       i__2 = i__;
-       i__3 = i__;
-       cx[i__2].r = cy[i__3].r, cx[i__2].i = cy[i__3].i;
-       i__2 = i__;
-       cy[i__2].r = ctemp.r, cy[i__2].i = ctemp.i;
-/* L30: */
-    }
-    return 0;
-} /* cswap_ */
-
-/* Subroutine */ int csymm_(char *side, char *uplo, integer *m, integer *n, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       complex *beta, complex *c__, integer *ldc, ftnlen side_len, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    complex q__1, q__2, q__3, q__4, q__5;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CSYMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*A*B + beta*C, */
-
-/*  or */
-
-/*     C := alpha*B*A + beta*C, */
-
-/*  where  alpha and beta are scalars, A is a symmetric matrix and  B and */
-/*  C are m by n matrices. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set NROWA as the number of rows of A. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0.f && alpha->i == 0.f && (beta->r 
-           == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (beta->r == 0.f && beta->i == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           q__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               q__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                               q__2.i;
-                       c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-                       i__4 = k + j * b_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       q__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, q__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               q__2.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       q__4.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               q__4.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           q__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               q__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       q__1.r = c__[i__4].r + q__2.r, q__1.i = c__[i__4].i + 
-                               q__2.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                       i__3 = k + j * b_dim1;
-                       i__4 = k + i__ * a_dim1;
-                       q__2.r = b[i__3].r * a[i__4].r - b[i__3].i * a[i__4]
-                               .i, q__2.i = b[i__3].r * a[i__4].i + b[i__3]
-                               .i * a[i__4].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       q__2.r = temp1.r * a[i__3].r - temp1.i * a[i__3].i, 
-                               q__2.i = temp1.r * a[i__3].i + temp1.i * a[
-                               i__3].r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, q__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       q__4.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               q__4.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__2].r = q__1.r, c__[i__2].i = q__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*B*A + beta*C. */
-
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2].i, q__1.i = 
-                   alpha->r * a[i__2].i + alpha->i * a[i__2].r;
-           temp1.r = q__1.r, temp1.i = q__1.i;
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   q__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           q__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   q__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           q__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   q__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           q__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                   c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   i__3 = j + k * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = j + k * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           q__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = q__1.r, temp1.i = q__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   q__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           q__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5].i + 
-                           q__2.i;
-                   c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-
-    return 0;
-
-/*     End of CSYMM . */
-
-} /* csymm_ */
-
-/* Subroutine */ int csyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       complex *alpha, complex *a, integer *lda, complex *b, integer *ldb, 
-       complex *beta, complex *c__, integer *ldc, ftnlen uplo_len, ftnlen 
-       trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    complex q__1, q__2, q__3, q__4, q__5;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CSYR2K  performs one of the symmetric rank 2k operations */
-
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' + */
-/*                                         beta*C. */
-
-/*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A + */
-/*                                         beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("CSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && (
-           beta->r == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (upper) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       i__3 = j + l * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f || (b[i__4].r != 
-                           0.f || b[i__4].i != 0.f)) {
-                       i__3 = j + l * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, q__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           q__2.r = c__[i__5].r + q__3.r, q__2.i = c__[i__5]
-                                   .i + q__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           q__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, q__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           q__1.r = q__2.r + q__4.r, q__1.i = q__2.i + 
-                                   q__4.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, q__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0.f, temp1.i = 0.f;
-                   temp2.r = 0.f, temp2.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, q__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       q__1.r = temp1.r + q__2.r, q__1.i = temp1.i + q__2.i;
-                       temp1.r = q__1.r, temp1.i = q__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, q__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp2.r + q__2.r, q__1.i = temp2.i + q__2.i;
-                       temp2.r = q__1.r, temp2.i = q__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               q__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       q__2.r = q__3.r + q__4.r, q__2.i = q__3.i + q__4.i;
-                       q__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               q__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       q__1.r = q__2.r + q__5.r, q__1.i = q__2.i + q__5.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CSYR2K. */
-
-} /* csyr2k_ */
-
-/* Subroutine */ int csyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       complex *alpha, complex *a, integer *lda, complex *beta, complex *c__,
-        integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    complex q__1, q__2, q__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CSYRK  performs one of the symmetric rank k operations */
-
-/*     C := alpha*A*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX         . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("CSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (alpha->r == 0.f && alpha->i == 0.f || *k == 0) && (
-           beta->r == 1.f && beta->i == 0.f)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       if (upper) {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0.f && beta->i == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*A' + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L90: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0.f && beta->i == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0.f, c__[i__3].i = 0.f;
-/* L140: */
-                   }
-               } else if (beta->r != 1.f || beta->i != 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       q__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                       i__3 = j + l * a_dim1;
-                       q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               q__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           q__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   q__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           q__1.r = c__[i__5].r + q__2.r, q__1.i = c__[i__5]
-                                   .i + q__2.i;
-                           c__[i__4].r = q__1.r, c__[i__4].i = q__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*A + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, q__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp.r = 0.f, temp.i = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       q__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, q__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       q__1.r = temp.r + q__2.r, q__1.i = temp.i + q__2.i;
-                       temp.r = q__1.r, temp.i = q__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0.f && beta->i == 0.f) {
-                       i__3 = i__ + j * c_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       q__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       q__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, q__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       q__1.r = q__2.r + q__3.r, q__1.i = q__2.i + q__3.i;
-                       c__[i__3].r = q__1.r, c__[i__3].i = q__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CSYRK . */
-
-} /* csyrk_ */
-
-/* Subroutine */ int ctbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, complex *a, integer *lda, complex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTBMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CTBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*         Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__2 = j;
-                           i__3 = kplus1 + j * a_dim1;
-                           q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           i__4 = ix;
-                           i__2 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__2].r + q__2.r, q__1.i = x[i__2].i + 
-                                   q__2.i;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__2 = kplus1 + j * a_dim1;
-                           q__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
-                                   i__2].i, q__1.i = x[i__4].r * a[i__2].i + 
-                                   x[i__4].i * a[i__2].r;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           i__1 = i__;
-                           i__3 = i__;
-                           i__2 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__1 = j;
-                           i__3 = j * a_dim1 + 1;
-                           q__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
-                                   i__3].i, q__1.i = x[i__1].r * a[i__3].i + 
-                                   x[i__1].i * a[i__3].r;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0.f || x[i__4].i != 0.f) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           i__4 = ix;
-                           i__1 = ix;
-                           i__2 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           q__1.r = x[i__1].r + q__2.r, q__1.i = x[i__1].i + 
-                                   q__2.i;
-                           x[i__4].r = q__1.r, x[i__4].i = q__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__1 = j * a_dim1 + 1;
-                           q__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
-                                   i__1].i, q__1.i = x[i__4].r * a[i__1].i + 
-                                   x[i__4].i * a[i__1].r;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__3 = j;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = i__;
-                           q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, q__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-                                   q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-                                   i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           q__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = ix;
-                           q__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, q__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = ix;
-                           q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-                                   q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-                                   i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L130: */
-                       }
-                   }
-                   i__3 = jx;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = j;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = i__;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = i__;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           q__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L190: */
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTBMV . */
-
-} /* ctbmv_ */
-
-/* Subroutine */ int ctbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, complex *a, integer *lda, complex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTBSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("CTBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = j;
-                           c_div(&q__1, &x[j], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r - q__2.r, q__1.i = x[i__3].i - 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = jx;
-                           c_div(&q__1, &x[jx], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r - q__2.r, q__1.i = x[i__3].i - 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = j;
-                           c_div(&q__1, &x[j], &a[j * a_dim1 + 1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = jx;
-                           c_div(&q__1, &x[jx], &a[j * a_dim1 + 1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = l + i__ + j * a_dim1;
-                           i__3 = i__;
-                           q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, q__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           q__2.r = q__3.r * x[i__4].r - q__3.i * x[i__4].i, 
-                                   q__2.i = q__3.r * x[i__4].i + q__3.i * x[
-                                   i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__3 = 1, i__4 = j - *k;
-                       i__2 = j - 1;
-                       for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                           i__3 = l + i__ + j * a_dim1;
-                           i__4 = ix;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[kplus1 + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__4 = i__;
-                           q__2.r = a[i__1].r * x[i__4].r - a[i__1].i * x[
-                                   i__4].i, q__2.i = a[i__1].r * x[i__4].i + 
-                                   a[i__1].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__2 = *n, i__1 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__2,i__1); i__ >= i__4; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = i__;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__4 = *n, i__2 = j + *k;
-                       i__1 = j + 1;
-                       for (i__ = min(i__4,i__2); i__ >= i__1; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           q__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[
-                                   i__2].i, q__2.i = a[i__4].r * x[i__2].i + 
-                                   a[i__4].i * x[i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           r_cnjg(&q__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j * a_dim1 + 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTBSV . */
-
-} /* ctbsv_ */
-
-/* Subroutine */ int ctpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *ap, complex *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTPMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CTPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x:= A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = kk + j - 1;
-                           q__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, q__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = kk + j - 1;
-                           q__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, q__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = k;
-                           q__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, q__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = kk - *n + j;
-                           q__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, q__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           q__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, q__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = kk - *n + j;
-                           q__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, q__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk - 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           q__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, q__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = k;
-                           i__2 = i__;
-                           q__2.r = ap[i__1].r * x[i__2].r - ap[i__1].i * x[
-                                   i__2].i, q__2.i = ap[i__1].r * x[i__2].i 
-                                   + ap[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__1 = i__;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= j;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           q__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, q__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = k;
-                           i__3 = ix;
-                           q__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, q__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           r_cnjg(&q__3, &ap[k]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk + 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           q__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, q__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += *n - j + 1;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           q__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, q__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = k;
-                           i__4 = ix;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTPMV . */
-
-} /* ctpmv_ */
-
-/* Subroutine */ int ctpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *ap, complex *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTPSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX          array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("CTPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = j;
-                           c_div(&q__1, &x[j], &ap[kk]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = k;
-                           q__2.r = temp.r * ap[i__3].r - temp.i * ap[i__3]
-                                   .i, q__2.i = temp.r * ap[i__3].i + temp.i 
-                                   * ap[i__3].r;
-                           q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = jx;
-                           c_div(&q__1, &x[jx], &ap[kk]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           q__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, q__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           q__1.r = x[i__3].r - q__2.r, q__1.i = x[i__3].i - 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = j;
-                           c_div(&q__1, &x[j], &ap[kk]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = jx;
-                           c_div(&q__1, &x[jx], &ap[kk]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           q__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, q__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L90: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk + j - 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ++k;
-/* L100: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk + j - 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += j;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = k;
-                           i__4 = ix;
-                           q__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, q__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk + j - 1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk + j - 1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = k;
-                           i__3 = i__;
-                           q__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, q__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L150: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk - *n + j]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__2 = i__;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           --k;
-/* L160: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk - *n + j]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= *n - j + 1;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = k;
-                           i__3 = ix;
-                           q__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, q__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &ap[kk - *n + j]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           r_cnjg(&q__3, &ap[k]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &ap[kk - *n + j]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTPSV . */
-
-} /* ctpsv_ */
-
-/* Subroutine */ int ctrmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, complex *alpha, complex *a, integer *lda, 
-       complex *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen 
-       transa_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static logical lside;
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTRMM  performs one of the matrix-matrix operations */
-
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ) */
-
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CTRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0.f, b[i__3].i = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*A*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                           i__3 = k + j * b_dim1;
-                           q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, q__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               q__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
-                                       .i, q__2.i = temp.r * a[i__6].i + 
-                                       temp.i * a[i__6].r;
-                               q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-                                       .i + q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L30: */
-                           }
-                           if (nounit) {
-                               i__3 = k + k * a_dim1;
-                               q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, q__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = k + j * b_dim1;
-                           b[i__3].r = temp.r, b[i__3].i = temp.i;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0.f || b[i__2].i != 0.f) {
-                           i__2 = k + j * b_dim1;
-                           q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
-                                   .i, q__1.i = alpha->r * b[i__2].i + 
-                                   alpha->i * b[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__2 = k + j * b_dim1;
-                           b[i__2].r = temp.r, b[i__2].i = temp.i;
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               i__3 = k + j * b_dim1;
-                               i__4 = k + k * a_dim1;
-                               q__1.r = b[i__3].r * a[i__4].r - b[i__3].i * 
-                                       a[i__4].i, q__1.i = b[i__3].r * a[
-                                       i__4].i + b[i__3].i * a[i__4].r;
-                               b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * a_dim1;
-                               q__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
-                                       .i, q__2.i = temp.r * a[i__5].i + 
-                                       temp.i * a[i__5].r;
-                               q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-                                       .i + q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       temp.r = b[i__2].r, temp.i = b[i__2].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__2 = i__ + i__ * a_dim1;
-                               q__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
-                                       .i, q__1.i = temp.r * a[i__2].i + 
-                                       temp.i * a[i__2].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, q__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                           }
-                       } else {
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                       q__1.i = temp.r * q__2.i + temp.i * 
-                                       q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
-                                       .i, q__2.i = q__3.r * b[i__3].i + 
-                                       q__3.i * b[i__3].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L110: */
-                   }
-/* L120: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       temp.r = b[i__3].r, temp.i = b[i__3].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__3 = i__ + i__ * a_dim1;
-                               q__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, q__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, q__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                           }
-                       } else {
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                       q__1.i = temp.r * q__2.i + temp.i * 
-                                       q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
-                                       .i, q__2.i = q__3.r * b[i__4].i + 
-                                       q__3.i * b[i__4].r;
-                               q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L140: */
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               q__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*A. */
-
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__1 = j + j * a_dim1;
-                       q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                               q__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
-                               .r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       i__2 = i__ + j * b_dim1;
-                       i__3 = i__ + j * b_dim1;
-                       q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                               q__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
-                               .r;
-                       b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L170: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           i__2 = k + j * a_dim1;
-                           q__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
-                                   .i, q__1.i = alpha->r * a[i__2].i + 
-                                   alpha->i * a[i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, q__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-                                       .i + q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-/* L200: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__2 = j + j * a_dim1;
-                       q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                               q__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
-                               .r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       i__4 = i__ + j * b_dim1;
-                       q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                               q__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
-                               .r;
-                       b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L210: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           i__3 = k + j * a_dim1;
-                           q__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
-                                   .i, q__1.i = alpha->r * a[i__3].i + 
-                                   alpha->i * a[i__3].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, q__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-                                       .i + q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L220: */
-                           }
-                       }
-/* L230: */
-                   }
-/* L240: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
-
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               q__1.r = alpha->r * a[i__3].r - alpha->i * a[
-                                       i__3].i, q__1.i = alpha->r * a[i__3]
-                                       .i + alpha->i * a[i__3].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           } else {
-                               r_cnjg(&q__2, &a[j + k * a_dim1]);
-                               q__1.r = alpha->r * q__2.r - alpha->i * 
-                                       q__2.i, q__1.i = alpha->r * q__2.i + 
-                                       alpha->i * q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, q__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               q__1.r = b[i__5].r + q__2.r, q__1.i = b[i__5]
-                                       .i + q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__2 = k + k * a_dim1;
-                           q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   if (temp.r != 1.f || temp.i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   q__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               q__1.r = alpha->r * a[i__2].r - alpha->i * a[
-                                       i__2].i, q__1.i = alpha->r * a[i__2]
-                                       .i + alpha->i * a[i__2].r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           } else {
-                               r_cnjg(&q__2, &a[j + k * a_dim1]);
-                               q__1.r = alpha->r * q__2.r - alpha->i * 
-                                       q__2.i, q__1.i = alpha->r * q__2.i + 
-                                       alpha->i * q__2.r;
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, q__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               q__1.r = b[i__4].r + q__2.r, q__1.i = b[i__4]
-                                       .i + q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L290: */
-                           }
-                       }
-/* L300: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__1 = k + k * a_dim1;
-                           q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   q__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   if (temp.r != 1.f || temp.i != 0.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   q__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L310: */
-                       }
-                   }
-/* L320: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTRMM . */
-
-} /* ctrmm_ */
-
-/* Subroutine */ int ctrmv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *a, integer *lda, complex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTRMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("CTRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = j + j * a_dim1;
-                           q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, q__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r + q__2.r, q__1.i = x[i__4].i + 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = j + j * a_dim1;
-                           q__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, q__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = j + j * a_dim1;
-                           q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   q__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           q__1.r = x[i__3].r + q__2.r, q__1.i = x[i__3].i + 
-                                   q__2.i;
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = j + j * a_dim1;
-                           q__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, q__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   q__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = i__;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__1 = i__;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   q__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = ix;
-                           q__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, q__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__1 = ix;
-                           q__2.r = q__3.r * x[i__1].r - q__3.i * x[i__1].i, 
-                                   q__2.i = q__3.r * x[i__1].i + q__3.i * x[
-                                   i__1].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           q__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   q__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           q__1.r = temp.r * q__2.r - temp.i * q__2.i, 
-                                   q__1.i = temp.r * q__2.i + temp.i * 
-                                   q__2.r;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r + q__2.r, q__1.i = temp.i + 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTRMV . */
-
-} /* ctrmv_ */
-
-/* Subroutine */ int ctrsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, complex *alpha, complex *a, integer *lda, 
-       complex *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen 
-       transa_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6, i__7;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static logical lside;
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTRSM  solves one of the matrix equations */
-
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-
-/*  The matrix X is overwritten on B. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX         . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX          array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("CTRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0.f && alpha->i == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0.f, b[i__3].i = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*inv( A )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0.f || b[i__2].i != 0.f) {
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               c_div(&q__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               q__2.r = b[i__5].r * a[i__6].r - b[i__5].i * 
-                                       a[i__6].i, q__2.i = b[i__5].r * a[
-                                       i__6].i + b[i__5].i * a[i__6].r;
-                               q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-                                       .i - q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0.f || b[i__3].i != 0.f) {
-                           if (nounit) {
-                               i__3 = k + j * b_dim1;
-                               c_div(&q__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * b_dim1;
-                               i__7 = i__ + k * a_dim1;
-                               q__2.r = b[i__6].r * a[i__7].r - b[i__6].i * 
-                                       a[i__7].i, q__2.i = b[i__6].r * a[
-                                       i__7].i + b[i__6].i * a[i__7].r;
-                               q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-                                       .i - q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*inv( A' )*B */
-/*           or    B := alpha*inv( conjg( A' ) )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               q__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       if (noconj) {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               q__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, q__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L110: */
-                           }
-                           if (nounit) {
-                               c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       } else {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__4].r - q__3.i * b[i__4]
-                                       .i, q__2.i = q__3.r * b[i__4].i + 
-                                       q__3.i * b[i__4].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L120: */
-                           }
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               c_div(&q__1, &temp, &q__2);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       b[i__3].r = temp.r, b[i__3].i = temp.i;
-/* L130: */
-                   }
-/* L140: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       q__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                               q__1.i = alpha->r * b[i__2].i + alpha->i * b[
-                               i__2].r;
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       if (noconj) {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               q__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, q__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                           }
-                           if (nounit) {
-                               c_div(&q__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       } else {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               r_cnjg(&q__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               q__2.r = q__3.r * b[i__3].r - q__3.i * b[i__3]
-                                       .i, q__2.i = q__3.r * b[i__3].i + 
-                                       q__3.i * b[i__3].r;
-                               q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                       q__2.i;
-                               temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                           }
-                           if (nounit) {
-                               r_cnjg(&q__2, &a[i__ + i__ * a_dim1]);
-                               c_div(&q__1, &temp, &q__2);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       b[i__2].r = temp.r, b[i__2].i = temp.i;
-/* L170: */
-                   }
-/* L180: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*inv( A ). */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * a_dim1;
-                               i__7 = i__ + k * b_dim1;
-                               q__2.r = a[i__6].r * b[i__7].r - a[i__6].i * 
-                                       b[i__7].i, q__2.i = a[i__6].r * b[
-                                       i__7].i + a[i__6].i * b[i__7].r;
-                               q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-                                       .i - q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-                   if (nounit) {
-                       c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   q__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L220: */
-                       }
-                   }
-/* L230: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, q__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L240: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * a_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = a[i__5].r * b[i__6].r - a[i__5].i * 
-                                       b[i__6].i, q__2.i = a[i__5].r * b[
-                                       i__6].i + a[i__5].i * b[i__6].r;
-                               q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-                                       .i - q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   if (nounit) {
-                       c_div(&q__1, &c_b21, &a[j + j * a_dim1]);
-                       temp.r = q__1.r, temp.i = q__1.i;
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   q__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*inv( A' ) */
-/*           or    B := alpha*B*inv( conjg( A' ) ). */
-
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       if (noconj) {
-                           c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           c_div(&q__1, &c_b21, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   q__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L290: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0.f || a[i__2].i != 0.f) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               temp.r = a[i__2].r, temp.i = a[i__2].i;
-                           } else {
-                               r_cnjg(&q__1, &a[j + k * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, q__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               q__1.r = b[i__4].r - q__2.r, q__1.i = b[i__4]
-                                       .i - q__2.i;
-                               b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L300: */
-                           }
-                       }
-/* L310: */
-                   }
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           q__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, q__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = q__1.r, b[i__2].i = q__1.i;
-/* L320: */
-                       }
-                   }
-/* L330: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       if (noconj) {
-                           c_div(&q__1, &c_b21, &a[k + k * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       } else {
-                           r_cnjg(&q__2, &a[k + k * a_dim1]);
-                           c_div(&q__1, &c_b21, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           q__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   q__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L340: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0.f || a[i__3].i != 0.f) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               temp.r = a[i__3].r, temp.i = a[i__3].i;
-                           } else {
-                               r_cnjg(&q__1, &a[j + k * a_dim1]);
-                               temp.r = q__1.r, temp.i = q__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               q__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, q__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               q__1.r = b[i__5].r - q__2.r, q__1.i = b[i__5]
-                                       .i - q__2.i;
-                               b[i__4].r = q__1.r, b[i__4].i = q__1.i;
-/* L350: */
-                           }
-                       }
-/* L360: */
-                   }
-                   if (alpha->r != 1.f || alpha->i != 0.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           q__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, q__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = q__1.r, b[i__3].i = q__1.i;
-/* L370: */
-                       }
-                   }
-/* L380: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTRSM . */
-
-} /* ctrsm_ */
-
-/* Subroutine */ int ctrsv_(char *uplo, char *trans, char *diag, integer *n, 
-       complex *a, integer *lda, complex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    complex q__1, q__2, q__3;
-
-    /* Builtin functions */
-    void c_div(complex *, complex *, complex *), r_cnjg(complex *, complex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static complex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  CTRSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX          array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX          array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("CTRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = j;
-                           c_div(&q__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0.f || x[i__1].i != 0.f) {
-                       if (nounit) {
-                           i__1 = jx;
-                           c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = ix;
-                           i__2 = ix;
-                           i__3 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   q__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           q__1.r = x[i__2].r - q__2.r, q__1.i = x[i__2].i - 
-                                   q__2.i;
-                           x[i__1].r = q__1.r, x[i__1].i = q__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = j;
-                           c_div(&q__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0.f || x[i__2].i != 0.f) {
-                       if (nounit) {
-                           i__2 = jx;
-                           c_div(&q__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__2].r = q__1.r, x[i__2].i = q__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           q__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   q__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           q__1.r = x[i__4].r - q__2.r, q__1.i = x[i__4].i - 
-                                   q__2.i;
-                           x[i__3].r = q__1.r, x[i__3].i = q__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   ix = kx;
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           q__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, q__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           q__2.r = q__3.r * x[i__3].r - q__3.i * x[i__3].i, 
-                                   q__2.i = q__3.r * x[i__3].i + q__3.i * x[
-                                   i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = i__;
-                           q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, q__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__2 = i__;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   ix = kx;
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = ix;
-                           q__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, q__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           c_div(&q__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           r_cnjg(&q__3, &a[i__ + j * a_dim1]);
-                           i__2 = ix;
-                           q__2.r = q__3.r * x[i__2].r - q__3.i * x[i__2].i, 
-                                   q__2.i = q__3.r * x[i__2].i + q__3.i * x[
-                                   i__2].r;
-                           q__1.r = temp.r - q__2.r, q__1.i = temp.i - 
-                                   q__2.i;
-                           temp.r = q__1.r, temp.i = q__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           r_cnjg(&q__2, &a[j + j * a_dim1]);
-                           c_div(&q__1, &temp, &q__2);
-                           temp.r = q__1.r, temp.i = q__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of CTRSV . */
-
-} /* ctrsv_ */
-
-doublereal dasum_(integer *n, doublereal *dx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val, d__1, d__2, d__3, d__4, d__5, d__6;
-
-    /* Local variables */
-    static integer i__, m, mp1;
-    static doublereal dtemp;
-    static integer nincx;
-
-
-/*     takes the sum of the absolute values. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dx;
-
-    /* Function Body */
-    ret_val = 0.;
-    dtemp = 0.;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       dtemp += (d__1 = dx[i__], abs(d__1));
-/* L10: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 6;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       dtemp += (d__1 = dx[i__], abs(d__1));
-/* L30: */
-    }
-    if (*n < 6) {
-       goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 6) {
-       dtemp = dtemp + (d__1 = dx[i__], abs(d__1)) + (d__2 = dx[i__ + 1], 
-               abs(d__2)) + (d__3 = dx[i__ + 2], abs(d__3)) + (d__4 = dx[i__ 
-               + 3], abs(d__4)) + (d__5 = dx[i__ + 4], abs(d__5)) + (d__6 = 
-               dx[i__ + 5], abs(d__6));
-/* L50: */
-    }
-L60:
-    ret_val = dtemp;
-    return ret_val;
-} /* dasum_ */
-
-/* Subroutine */ int daxpy_(integer *n, doublereal *da, doublereal *dx, 
-       integer *incx, doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*     constant times a vector plus a vector. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*da == 0.) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[iy] += *da * dx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 4;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[i__] += *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 4) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-       dy[i__] += *da * dx[i__];
-       dy[i__ + 1] += *da * dx[i__ + 1];
-       dy[i__ + 2] += *da * dx[i__ + 2];
-       dy[i__ + 3] += *da * dx[i__ + 3];
-/* L50: */
-    }
-    return 0;
-} /* daxpy_ */
-
-doublereal dcabs1_(doublecomplex *z__)
-{
-    /* System generated locals */
-    doublereal ret_val;
-    static doublecomplex equiv_0[1];
-
-    /* Local variables */
-#define t ((doublereal *)equiv_0)
-#define zz (equiv_0)
-
-    zz->r = z__->r, zz->i = z__->i;
-    ret_val = abs(t[0]) + abs(t[1]);
-    return ret_val;
-} /* dcabs1_ */
-
-#undef zz
-#undef t
-
-
-/* Subroutine */ int dcopy_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*     copies a vector, x, to a vector, y. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[iy] = dx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 7;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dy[i__] = dx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-       dy[i__] = dx[i__];
-       dy[i__ + 1] = dx[i__ + 1];
-       dy[i__ + 2] = dx[i__ + 2];
-       dy[i__ + 3] = dx[i__ + 3];
-       dy[i__ + 4] = dx[i__ + 4];
-       dy[i__ + 5] = dx[i__ + 5];
-       dy[i__ + 6] = dx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} /* dcopy_ */
-
-doublereal ddot_(integer *n, doublereal *dx, integer *incx, doublereal *dy, 
-       integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
-
-
-/*     forms the dot product of two vectors. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    ret_val = 0.;
-    dtemp = 0.;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp += dx[ix] * dy[iy];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    ret_val = dtemp;
-    return ret_val;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp += dx[i__] * dy[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-       dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1] + dx[
-               i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 
-               4] * dy[i__ + 4];
-/* L50: */
-    }
-L60:
-    ret_val = dtemp;
-    return ret_val;
-} /* ddot_ */
-
-/* Subroutine */ int dgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, doublereal *alpha, doublereal *a, integer *lda, 
-       doublereal *x, integer *incx, doublereal *beta, doublereal *y, 
-       integer *incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static doublereal temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DGBMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("DGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       y[i__] += temp * a[k + i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       y[iy] += temp * a[k + i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               k = kup1 - j;
-/* Computing MAX */
-               i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__2 = min(i__5,i__6);
-               for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               ix = kx;
-               k = kup1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__4 = min(i__5,i__6);
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DGBMV . */
-
-} /* dgbmv_ */
-
-/* Subroutine */ int dgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, doublereal *alpha, doublereal *a, integer *lda, 
-       doublereal *b, integer *ldb, doublereal *beta, doublereal *c__, 
-       integer *ldc, ftnlen transa_len, ftnlen transb_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static doublereal temp;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DGEMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-
-/*  where  op( X ) is one of */
-
-/*     op( X ) = X   or   op( X ) = X', */
-
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c',  op( A ) = A'. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSB - CHARACTER*1. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-
-/*              TRANSB = 'C' or 'c',  op( B ) = B'. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows */
-/*     and  columns of  A  and the  number of  rows  of  B  respectively. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! nota && ! lsame_(transa, "C", (ftnlen)1, (ftnlen)1) && ! lsame_(
-           transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! lsame_(transb, "C", (ftnlen)1, (ftnlen)1) && ! 
-           lsame_(transb, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("DGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-
-/*     And if  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       if (*beta == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (notb) {
-       if (nota) {
-
-/*           Form  C := alpha*A*B + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[l + j * b_dim1] != 0.) {
-                       temp = *alpha * b[l + j * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-/* L100: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       }
-    } else {
-       if (nota) {
-
-/*           Form  C := alpha*A*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L130: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[j + l * b_dim1] != 0.) {
-                       temp = *alpha * b[j + l * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L150: */
-                       }
-                   }
-/* L160: */
-               }
-/* L170: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DGEMM . */
-
-} /* dgemm_ */
-
-/* Subroutine */ int dgemv_(char *trans, integer *m, integer *n, doublereal *
-       alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
-       doublereal *beta, doublereal *y, integer *incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DGEMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[iy] += temp * a[i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DGEMV . */
-
-} /* dgemv_ */
-
-/* Subroutine */ int dger_(integer *m, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *y, integer *incy, 
-       doublereal *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublereal temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DGER   performs the rank 1 operation */
-
-/*     A := alpha*x*y' + A, */
-
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DGER  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.) {
-               temp = *alpha * y[jy];
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.) {
-               temp = *alpha * y[jy];
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[ix] * temp;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-
-    return 0;
-
-/*     End of DGER  . */
-
-} /* dger_ */
-
-doublereal dnrm2_(integer *n, doublereal *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val, d__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, norm, scale, absxi;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  DNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-
-/*     DNRM2 := sqrt( x'*x ) */
-
-
-
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to DLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.;
-    } else if (*n == 1) {
-       norm = abs(x[1]);
-    } else {
-       scale = 0.;
-       ssq = 1.;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL DLASSQ( N, X, INCX, SCALE, SSQ ) */
-
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           if (x[ix] != 0.) {
-               absxi = (d__1 = x[ix], abs(d__1));
-               if (scale < absxi) {
-/* Computing 2nd power */
-                   d__1 = scale / absxi;
-                   ssq = ssq * (d__1 * d__1) + 1.;
-                   scale = absxi;
-               } else {
-/* Computing 2nd power */
-                   d__1 = absxi / scale;
-                   ssq += d__1 * d__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of DNRM2. */
-
-} /* dnrm2_ */
-
-/* Subroutine */ int drot_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy, doublereal *c__, doublereal *s)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublereal dtemp;
-
-
-/*     applies a plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = *c__ * dx[ix] + *s * dy[iy];
-       dy[iy] = *c__ * dy[iy] - *s * dx[ix];
-       dx[ix] = dtemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = *c__ * dx[i__] + *s * dy[i__];
-       dy[i__] = *c__ * dy[i__] - *s * dx[i__];
-       dx[i__] = dtemp;
-/* L30: */
-    }
-    return 0;
-} /* drot_ */
-
-/* Subroutine */ int drotg_(doublereal *da, doublereal *db, doublereal *c__, 
-       doublereal *s)
-{
-    /* System generated locals */
-    doublereal d__1, d__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), d_sign(doublereal *, doublereal *);
-
-    /* Local variables */
-    static doublereal r__, z__, roe, scale;
-
-
-/*     construct givens plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-
-
-    roe = *db;
-    if (abs(*da) > abs(*db)) {
-       roe = *da;
-    }
-    scale = abs(*da) + abs(*db);
-    if (scale != 0.) {
-       goto L10;
-    }
-    *c__ = 1.;
-    *s = 0.;
-    r__ = 0.;
-    z__ = 0.;
-    goto L20;
-L10:
-/* Computing 2nd power */
-    d__1 = *da / scale;
-/* Computing 2nd power */
-    d__2 = *db / scale;
-    r__ = scale * sqrt(d__1 * d__1 + d__2 * d__2);
-    r__ = d_sign(&c_b876, &roe) * r__;
-    *c__ = *da / r__;
-    *s = *db / r__;
-    z__ = 1.;
-    if (abs(*da) > abs(*db)) {
-       z__ = *s;
-    }
-    if (abs(*db) >= abs(*da) && *c__ != 0.) {
-       z__ = 1. / *c__;
-    }
-L20:
-    *da = r__;
-    *db = z__;
-    return 0;
-} /* drotg_ */
-
-/* Subroutine */ int drotm_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy, doublereal *dparam)
-{
-    /* Initialized data */
-
-    static doublereal zero = 0.;
-    static doublereal two = 2.;
-
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__;
-    static doublereal w, z__;
-    static integer kx, ky;
-    static doublereal dh11, dh12, dh22, dh21, dflag;
-    static integer nsteps;
-
-
-/*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
-
-/*     (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN */
-/*     (DY**T) */
-
-/*     DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-/*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY. */
-/*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-/*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
-
-/*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-/*     SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM. */
-
-    /* Parameter adjustments */
-    --dparam;
-    --dy;
-    --dx;
-
-    /* Function Body */
-
-    dflag = dparam[1];
-    if (*n <= 0 || dflag + two == zero) {
-       goto L140;
-    }
-    if (! (*incx == *incy && *incx > 0)) {
-       goto L70;
-    }
-
-    nsteps = *n * *incx;
-    if (dflag < 0.) {
-       goto L50;
-    } else if (dflag == 0) {
-       goto L10;
-    } else {
-       goto L30;
-    }
-L10:
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = dx[i__];
-       z__ = dy[i__];
-       dx[i__] = w + z__ * dh12;
-       dy[i__] = w * dh21 + z__;
-/* L20: */
-    }
-    goto L140;
-L30:
-    dh11 = dparam[2];
-    dh22 = dparam[5];
-    i__2 = nsteps;
-    i__1 = *incx;
-    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-       w = dx[i__];
-       z__ = dy[i__];
-       dx[i__] = w * dh11 + z__;
-       dy[i__] = -w + dh22 * z__;
-/* L40: */
-    }
-    goto L140;
-L50:
-    dh11 = dparam[2];
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    dh22 = dparam[5];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = dx[i__];
-       z__ = dy[i__];
-       dx[i__] = w * dh11 + z__ * dh12;
-       dy[i__] = w * dh21 + z__ * dh22;
-/* L60: */
-    }
-    goto L140;
-L70:
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-
-    if (dflag < 0.) {
-       goto L120;
-    } else if (dflag == 0) {
-       goto L80;
-    } else {
-       goto L100;
-    }
-L80:
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = dx[kx];
-       z__ = dy[ky];
-       dx[kx] = w + z__ * dh12;
-       dy[ky] = w * dh21 + z__;
-       kx += *incx;
-       ky += *incy;
-/* L90: */
-    }
-    goto L140;
-L100:
-    dh11 = dparam[2];
-    dh22 = dparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = dx[kx];
-       z__ = dy[ky];
-       dx[kx] = w * dh11 + z__;
-       dy[ky] = -w + dh22 * z__;
-       kx += *incx;
-       ky += *incy;
-/* L110: */
-    }
-    goto L140;
-L120:
-    dh11 = dparam[2];
-    dh12 = dparam[4];
-    dh21 = dparam[3];
-    dh22 = dparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = dx[kx];
-       z__ = dy[ky];
-       dx[kx] = w * dh11 + z__ * dh12;
-       dy[ky] = w * dh21 + z__ * dh22;
-       kx += *incx;
-       ky += *incy;
-/* L130: */
-    }
-L140:
-    return 0;
-} /* drotm_ */
-
-/* Subroutine */ int drotmg_(doublereal *dd1, doublereal *dd2, doublereal *
-       dx1, doublereal *dy1, doublereal *dparam)
-{
-    /* Initialized data */
-
-    static doublereal zero = 0.;
-    static doublereal one = 1.;
-    static doublereal two = 2.;
-    static doublereal gam = 4096.;
-    static doublereal gamsq = 16777216.;
-    static doublereal rgamsq = 5.9604645e-8;
-
-    /* Format strings */
-    static char fmt_120[] = "";
-    static char fmt_150[] = "";
-    static char fmt_180[] = "";
-    static char fmt_210[] = "";
-
-    /* System generated locals */
-    doublereal d__1;
-
-    /* Local variables */
-    static doublereal du, dp1, dp2, dq2, dq1, dh11, dh21, dh12, dh22;
-    static integer igo;
-    static doublereal dflag, dtemp;
-
-    /* Assigned format variables */
-    static char *igo_fmt;
-
-
-/*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
-/*     THE SECOND COMPONENT OF THE 2-VECTOR  (DSQRT(DD1)*DX1,DSQRT(DD2)* */
-/*     DY2)**T. */
-/*     WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-/*     DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0 */
-
-/*       (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0). */
-/*     LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22 */
-/*     RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE */
-/*     VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.) */
-
-/*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
-/*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
-/*     OF DD1 AND DD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
-
-
-    /* Parameter adjustments */
-    --dparam;
-
-    /* Function Body */
-    if (! (*dd1 < zero)) {
-       goto L10;
-    }
-/*       GO ZERO-H-D-AND-DX1.. */
-    goto L60;
-L10:
-/*     CASE-DD1-NONNEGATIVE */
-    dp2 = *dd2 * *dy1;
-    if (! (dp2 == zero)) {
-       goto L20;
-    }
-    dflag = -two;
-    goto L260;
-/*     REGULAR-CASE.. */
-L20:
-    dp1 = *dd1 * *dx1;
-    dq2 = dp2 * *dy1;
-    dq1 = dp1 * *dx1;
-
-    if (! (abs(dq1) > abs(dq2))) {
-       goto L40;
-    }
-    dh21 = -(*dy1) / *dx1;
-    dh12 = dp2 / dp1;
-
-    du = one - dh12 * dh21;
-
-    if (! (du <= zero)) {
-       goto L30;
-    }
-/*         GO ZERO-H-D-AND-DX1.. */
-    goto L60;
-L30:
-    dflag = zero;
-    *dd1 /= du;
-    *dd2 /= du;
-    *dx1 *= du;
-/*         GO SCALE-CHECK.. */
-    goto L100;
-L40:
-    if (! (dq2 < zero)) {
-       goto L50;
-    }
-/*         GO ZERO-H-D-AND-DX1.. */
-    goto L60;
-L50:
-    dflag = one;
-    dh11 = dp1 / dp2;
-    dh22 = *dx1 / *dy1;
-    du = one + dh11 * dh22;
-    dtemp = *dd2 / du;
-    *dd2 = *dd1 / du;
-    *dd1 = dtemp;
-    *dx1 = *dy1 * du;
-/*         GO SCALE-CHECK */
-    goto L100;
-/*     PROCEDURE..ZERO-H-D-AND-DX1.. */
-L60:
-    dflag = -one;
-    dh11 = zero;
-    dh12 = zero;
-    dh21 = zero;
-    dh22 = zero;
-
-    *dd1 = zero;
-    *dd2 = zero;
-    *dx1 = zero;
-/*         RETURN.. */
-    goto L220;
-/*     PROCEDURE..FIX-H.. */
-L70:
-    if (! (dflag >= zero)) {
-       goto L90;
-    }
-
-    if (! (dflag == zero)) {
-       goto L80;
-    }
-    dh11 = one;
-    dh22 = one;
-    dflag = -one;
-    goto L90;
-L80:
-    dh21 = -one;
-    dh12 = one;
-    dflag = -one;
-L90:
-    switch (igo) {
-       case 0: goto L120;
-       case 1: goto L150;
-       case 2: goto L180;
-       case 3: goto L210;
-    }
-/*     PROCEDURE..SCALE-CHECK */
-L100:
-L110:
-    if (! (*dd1 <= rgamsq)) {
-       goto L130;
-    }
-    if (*dd1 == zero) {
-       goto L160;
-    }
-    igo = 0;
-    igo_fmt = fmt_120;
-/*              FIX-H.. */
-    goto L70;
-L120:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd1 *= d__1 * d__1;
-    *dx1 /= gam;
-    dh11 /= gam;
-    dh12 /= gam;
-    goto L110;
-L130:
-L140:
-    if (! (*dd1 >= gamsq)) {
-       goto L160;
-    }
-    igo = 1;
-    igo_fmt = fmt_150;
-/*              FIX-H.. */
-    goto L70;
-L150:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd1 /= d__1 * d__1;
-    *dx1 *= gam;
-    dh11 *= gam;
-    dh12 *= gam;
-    goto L140;
-L160:
-L170:
-    if (! (abs(*dd2) <= rgamsq)) {
-       goto L190;
-    }
-    if (*dd2 == zero) {
-       goto L220;
-    }
-    igo = 2;
-    igo_fmt = fmt_180;
-/*              FIX-H.. */
-    goto L70;
-L180:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd2 *= d__1 * d__1;
-    dh21 /= gam;
-    dh22 /= gam;
-    goto L170;
-L190:
-L200:
-    if (! (abs(*dd2) >= gamsq)) {
-       goto L220;
-    }
-    igo = 3;
-    igo_fmt = fmt_210;
-/*              FIX-H.. */
-    goto L70;
-L210:
-/* Computing 2nd power */
-    d__1 = gam;
-    *dd2 /= d__1 * d__1;
-    dh21 *= gam;
-    dh22 *= gam;
-    goto L200;
-L220:
-    if (dflag < 0.) {
-       goto L250;
-    } else if (dflag == 0) {
-       goto L230;
-    } else {
-       goto L240;
-    }
-L230:
-    dparam[3] = dh21;
-    dparam[4] = dh12;
-    goto L260;
-L240:
-    dparam[2] = dh11;
-    dparam[5] = dh22;
-    goto L260;
-L250:
-    dparam[2] = dh11;
-    dparam[3] = dh21;
-    dparam[4] = dh12;
-    dparam[5] = dh22;
-L260:
-    dparam[1] = dflag;
-    return 0;
-} /* drotmg_ */
-
-/* Subroutine */ int dsbmv_(char *uplo, integer *n, integer *k, doublereal *
-       alpha, doublereal *a, integer *lda, doublereal *x, integer *incx, 
-       doublereal *beta, doublereal *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSBMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric band matrix, with k super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DSBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * 
-                       temp2;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               y[j] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               y[jy] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSBMV . */
-
-} /* dsbmv_ */
-
-/* Subroutine */ int dscal_(integer *n, doublereal *da, doublereal *dx, 
-       integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-
-
-/*     scales a vector by a constant. */
-/*     uses unrolled loops for increment equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       dx[i__] = *da * dx[i__];
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       dx[i__] = *da * dx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-       dx[i__] = *da * dx[i__];
-       dx[i__ + 1] = *da * dx[i__ + 1];
-       dx[i__ + 2] = *da * dx[i__ + 2];
-       dx[i__ + 3] = *da * dx[i__ + 3];
-       dx[i__ + 4] = *da * dx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* dscal_ */
-
-/* DECK DSDOT */
-doublereal dsdot_(integer *n, real *sx, integer *incx, real *sy, integer *
-       incy)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublereal ret_val;
-
-    /* Local variables */
-    static integer i__, ns, kx, ky;
-
-/* ***BEGIN PROLOGUE  DSDOT */
-/* ***PURPOSE  Compute the inner product of two vectors with extended */
-/*            precision accumulation and result. */
-/* ***LIBRARY   SLATEC (BLAS) */
-/* ***CATEGORY  D1A4 */
-/* ***TYPE      DOUBLE PRECISION (DSDOT-D, DCDOT-C) */
-/* ***KEYWORDS  BLAS, COMPLEX VECTORS, DOT PRODUCT, INNER PRODUCT, */
-/*             LINEAR ALGEBRA, VECTOR */
-/* ***AUTHOR  Lawson, C. L., (JPL) */
-/*           Hanson, R. J., (SNLA) */
-/*           Kincaid, D. R., (U. of Texas) */
-/*           Krogh, F. T., (JPL) */
-/* ***DESCRIPTION */
-
-/*                B L A S  Subprogram */
-/*    Description of Parameters */
-
-/*     --Input-- */
-/*        N  number of elements in input vector(s) */
-/*       SX  single precision vector with N elements */
-/*     INCX  storage spacing between elements of SX */
-/*       SY  single precision vector with N elements */
-/*     INCY  storage spacing between elements of SY */
-
-/*     --Output-- */
-/*    DSDOT  double precision dot product (zero if N.LE.0) */
-
-/*     Returns D.P. dot product accumulated in D.P., for S.P. SX and SY */
-/*     DSDOT = sum for I = 0 to N-1 of  SX(LX+I*INCX) * SY(LY+I*INCY), */
-/*     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is */
-/*     defined in a similar way using INCY. */
-
-/* ***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. */
-/*                 Krogh, Basic linear algebra subprograms for Fortran */
-/*                 usage, Algorithm No. 539, Transactions on Mathematical */
-/*                 Software 5, 3 (September 1979), pp. 308-323. */
-/* ***ROUTINES CALLED  (NONE) */
-/* ***REVISION HISTORY  (YYMMDD) */
-/*   791001  DATE WRITTEN */
-/*   890831  Modified array declarations.  (WRB) */
-/*   890831  REVISION DATE from Version 3.2 */
-/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
-/*   920310  Corrected definition of LX in DESCRIPTION.  (WRB) */
-/*   920501  Reformatted the REFERENCES section.  (WRB) */
-/* ***END PROLOGUE  DSDOT */
-/* ***FIRST EXECUTABLE STATEMENT  DSDOT */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    ret_val = 0.;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == *incy && *incx > 0) {
-       goto L20;
-    }
-
-/*     Code for unequal or nonpositive increments. */
-
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       ret_val += (doublereal) sx[kx] * (doublereal) sy[ky];
-       kx += *incx;
-       ky += *incy;
-/* L10: */
-    }
-    return ret_val;
-
-/*     Code for equal, positive, non-unit increments. */
-
-L20:
-    ns = *n * *incx;
-    i__1 = ns;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       ret_val += (doublereal) sx[i__] * (doublereal) sy[i__];
-/* L30: */
-    }
-    return ret_val;
-} /* dsdot_ */
-
-/* Subroutine */ int dspmv_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *ap, doublereal *x, integer *incx, doublereal *beta, 
-       doublereal *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSPMV  performs the matrix-vector operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DSPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               y[j] += temp1 * ap[kk];
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               y[jy] += temp1 * ap[kk];
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSPMV . */
-
-} /* dspmv_ */
-
-/* Subroutine */ int dspr_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSPR    performs the symmetric rank 1 operation */
-
-/*     A := alpha*x*x' + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("DSPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSPR  . */
-
-} /* dspr_ */
-
-/* Subroutine */ int dspr2_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *y, integer *incy, 
-       doublereal *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSPR2  performs the symmetric rank 2 operation */
-
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DSPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSPR2 . */
-
-} /* dspr2_ */
-
-/* Subroutine */ int dswap_(integer *n, doublereal *dx, integer *incx, 
-       doublereal *dy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static doublereal dtemp;
-
-
-/*     interchanges two vectors. */
-/*     uses unrolled loops for increments equal one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dy;
-    --dx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = dx[ix];
-       dx[ix] = dy[iy];
-       dy[iy] = dtemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-
-/*       clean-up loop */
-
-L20:
-    m = *n % 3;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dtemp = dx[i__];
-       dx[i__] = dy[i__];
-       dy[i__] = dtemp;
-/* L30: */
-    }
-    if (*n < 3) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-       dtemp = dx[i__];
-       dx[i__] = dy[i__];
-       dy[i__] = dtemp;
-       dtemp = dx[i__ + 1];
-       dx[i__ + 1] = dy[i__ + 1];
-       dy[i__ + 1] = dtemp;
-       dtemp = dx[i__ + 2];
-       dx[i__ + 2] = dy[i__ + 2];
-       dy[i__ + 2] = dtemp;
-/* L50: */
-    }
-    return 0;
-} /* dswap_ */
-
-/* Subroutine */ int dsymm_(char *side, char *uplo, integer *m, integer *n, 
-       doublereal *alpha, doublereal *a, integer *lda, doublereal *b, 
-       integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, ftnlen 
-       side_len, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSYMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*A*B + beta*C, */
-
-/*  or */
-
-/*     C := alpha*B*A + beta*C, */
-
-/*  where alpha and beta are scalars,  A is a symmetric matrix and  B and */
-/*  C are  m by n matrices. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is  n otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set NROWA as the number of rows of A. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("DSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       if (*beta == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L50: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L80: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*B*A + beta*C. */
-
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           temp1 = *alpha * a[j + j * a_dim1];
-           if (*beta == 0.) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1];
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + 
-                           temp1 * b[i__ + j * b_dim1];
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               } else {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               } else {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-
-    return 0;
-
-/*     End of DSYMM . */
-
-} /* dsymm_ */
-
-/* Subroutine */ int dsymv_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *a, integer *lda, doublereal *x, integer *incx, doublereal 
-       *beta, doublereal *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSYMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("DSYMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0. && *beta == 1.) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.) {
-       if (*incy == 1) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when A is stored in upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when A is stored in lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.;
-               y[j] += temp1 * a[j + j * a_dim1];
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.;
-               y[jy] += temp1 * a[j + j * a_dim1];
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSYMV . */
-
-} /* dsymv_ */
-
-/* Subroutine */ int dsyr_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *a, integer *lda, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSYR   performs the symmetric rank 1 operation */
-
-/*     A := alpha*x*x' + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DSYR  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in upper triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in lower triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.) {
-                   temp = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSYR  . */
-
-} /* dsyr_ */
-
-/* Subroutine */ int dsyr2_(char *uplo, integer *n, doublereal *alpha, 
-       doublereal *x, integer *incx, doublereal *y, integer *incy, 
-       doublereal *a, integer *lda, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSYR2  performs the symmetric rank 2 operation */
-
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n symmetric matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DSYR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0. || y[j] != 0.) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0. || y[jy] != 0.) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSYR2 . */
-
-} /* dsyr2_ */
-
-/* Subroutine */ int dsyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       doublereal *alpha, doublereal *a, integer *lda, doublereal *b, 
-       integer *ldb, doublereal *beta, doublereal *c__, integer *ldc, ftnlen 
-       uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublereal temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSYR2K  performs one of the symmetric rank 2k operations */
-
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' + */
-/*                                        beta*C. */
-
-/*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-
-/*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrices  A and B.  K must be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("DSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0. || b[j + l * b_dim1] != 0.) {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = 0.;
-                   temp2 = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1 = 0.;
-                   temp2 = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSYR2K. */
-
-} /* dsyr2k_ */
-
-/* Subroutine */ int dsyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       doublereal *alpha, doublereal *a, integer *lda, doublereal *beta, 
-       doublereal *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DSYRK  performs one of the symmetric rank k operations */
-
-/*     C := alpha*A*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-
-/*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrix  A.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("DSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*A' + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*A + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DSYRK . */
-
-} /* dsyrk_ */
-
-/* Subroutine */ int dtbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx,
-        ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTBMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DTBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*         Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j * a_dim1 + 1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j * a_dim1 + 1];
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[j];
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[jx];
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTBMV . */
-
-} /* dtbmv_ */
-
-/* Subroutine */ int dtbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublereal *a, integer *lda, doublereal *x, integer *incx,
-        ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTBSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("DTBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[j] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[j];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   if (x[jx] != 0.) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[jx] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[jx];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       l = 1 - j;
-                       if (nounit) {
-                           x[j] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[j];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   if (x[jx] != 0.) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           x[jx] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[jx];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A')*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *k;
-                   i__4 = j - 1;
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *k;
-                   i__3 = j - 1;
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = 1 - j;
-/* Computing MIN */
-                   i__1 = *n, i__3 = j + *k;
-                   i__4 = j + 1;
-                   for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = 1 - j;
-/* Computing MIN */
-                   i__4 = *n, i__1 = j + *k;
-                   i__3 = j + 1;
-                   for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTBSV . */
-
-} /* dtbsv_ */
-
-/* Subroutine */ int dtpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *ap, doublereal *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTPMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DTPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x:= A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * ap[k];
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk + j - 1];
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           x[ix] += temp * ap[k];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk + j - 1];
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * ap[k];
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk - *n + j];
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           x[ix] += temp * ap[k];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk - *n + j];
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk - 1;
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += ap[k] * x[i__];
-                       --k;
-/* L90: */
-                   }
-                   x[j] = temp;
-                   kk -= j;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__1 = kk - j + 1;
-                   for (k = kk - 1; k >= i__1; --k) {
-                       ix -= *incx;
-                       temp += ap[k] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += ap[k] * x[i__];
-                       ++k;
-/* L130: */
-                   }
-                   x[j] = temp;
-                   kk += *n - j + 1;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       temp += ap[k] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTPMV . */
-
-} /* dtpmv_ */
-
-/* Subroutine */ int dtpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *ap, doublereal *x, integer *incx, ftnlen uplo_len, ftnlen 
-       trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTPSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - DOUBLE PRECISION array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("DTPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * ap[k];
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           x[ix] -= temp * ap[k];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * ap[k];
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           x[ix] -= temp * ap[k];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= ap[k] * x[i__];
-                       ++k;
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[j] = temp;
-                   kk += j;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       temp -= ap[k] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   k = kk;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= ap[k] * x[i__];
-                       --k;
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[j] = temp;
-                   kk -= *n - j + 1;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = kk - (*n - (j + 1));
-                   for (k = kk; k >= i__1; --k) {
-                       temp -= ap[k] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTPSV . */
-
-} /* dtpsv_ */
-
-/* Subroutine */ int dtrmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-       lda, doublereal *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, 
-       ftnlen transa_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTRMM  performs one of the matrix-matrix operations */
-
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ), */
-
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DTRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*A*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L30: */
-                           }
-                           if (nounit) {
-                               temp *= a[k + k * a_dim1];
-                           }
-                           b[k + j * b_dim1] = temp;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           b[k + j * b_dim1] = temp;
-                           if (nounit) {
-                               b[k + j * b_dim1] *= a[k + k * a_dim1];
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*A'*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__2 = i__ - 1;
-                       for (k = 1; k <= i__2; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L90: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L100: */
-                   }
-/* L110: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__3 = *m;
-                       for (k = i__ + 1; k <= i__3; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L130: */
-                   }
-/* L140: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*A. */
-
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L160: */
-                           }
-                       }
-/* L170: */
-                   }
-/* L180: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-/* L220: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*A'. */
-
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L270: */
-                           }
-                       }
-/* L280: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
-                       }
-                   }
-/* L300: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTRMM . */
-
-} /* dtrmm_ */
-
-/* Subroutine */ int dtrmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *a, integer *lda, doublereal *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTRMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("DTRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       temp = x[j];
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       ix -= *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTRMV . */
-
-} /* dtrmv_ */
-
-/* Subroutine */ int dtrsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublereal *alpha, doublereal *a, integer *
-       lda, doublereal *b, integer *ldb, ftnlen side_len, ftnlen uplo_len, 
-       ftnlen transa_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublereal temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTRSM  solves one of the matrix equations */
-
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'. */
-
-/*  The matrix X is overwritten on B. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - DOUBLE PRECISION array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("DTRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*inv( A )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*inv( A' )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__3 = i__ - 1;
-                       for (k = 1; k <= i__3; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L110: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L120: */
-                   }
-/* L130: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__2 = *m;
-                       for (k = i__ + 1; k <= i__2; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*inv( A ). */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L170: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-                   if (nounit) {
-                       temp = 1. / a[j + j * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
-                       }
-                   }
-/* L210: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (*alpha != 1.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L220: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   if (nounit) {
-                       temp = 1. / a[j + j * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*inv( A' ). */
-
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       temp = 1. / a[k + k * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L280: */
-                           }
-                       }
-/* L290: */
-                   }
-                   if (*alpha != 1.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L300: */
-                       }
-                   }
-/* L310: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       temp = 1. / a[k + k * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.) {
-                           temp = a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L330: */
-                           }
-                       }
-/* L340: */
-                   }
-                   if (*alpha != 1.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L350: */
-                       }
-                   }
-/* L360: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTRSM . */
-
-} /* dtrsm_ */
-
-/* Subroutine */ int dtrsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublereal *a, integer *lda, doublereal *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublereal temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  DTRSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - DOUBLE PRECISION array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - DOUBLE PRECISION array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("DTRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of DTRSV . */
-
-} /* dtrsv_ */
-
-doublereal dzasum_(integer *n, doublecomplex *zx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1;
-    doublereal ret_val;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal stemp;
-    extern doublereal dcabs1_(doublecomplex *);
-
-
-/*     takes the sum of the absolute values. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --zx;
-
-    /* Function Body */
-    ret_val = 0.;
-    stemp = 0.;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += dcabs1_(&zx[ix]);
-       ix += *incx;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += dcabs1_(&zx[i__]);
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* dzasum_ */
-
-doublereal dznrm2_(integer *n, doublecomplex *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublereal ret_val, d__1;
-
-    /* Builtin functions */
-    double d_imag(doublecomplex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static doublereal ssq, temp, norm, scale;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  DZNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-
-/*     DZNRM2 := sqrt( conjg( x' )*x ) */
-
-
-
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to ZLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.;
-    } else {
-       scale = 0.;
-       ssq = 1.;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL ZLASSQ( N, X, INCX, SCALE, SSQ ) */
-
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           i__3 = ix;
-           if (x[i__3].r != 0.) {
-               i__3 = ix;
-               temp = (d__1 = x[i__3].r, abs(d__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   d__1 = scale / temp;
-                   ssq = ssq * (d__1 * d__1) + 1.;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   d__1 = temp / scale;
-                   ssq += d__1 * d__1;
-               }
-           }
-           if (d_imag(&x[ix]) != 0.) {
-               temp = (d__1 = d_imag(&x[ix]), abs(d__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   d__1 = scale / temp;
-                   ssq = ssq * (d__1 * d__1) + 1.;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   d__1 = temp / scale;
-                   ssq += d__1 * d__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of DZNRM2. */
-
-} /* dznrm2_ */
-
-integer icamax_(integer *n, complex *cx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1, i__2;
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, ix;
-    static real smax;
-
-
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), dabs(
-               r__2)) <= smax) {
-           goto L5;
-       }
-       ret_val = i__;
-       i__2 = ix;
-       smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[ix]), 
-               dabs(r__2));
-L5:
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    smax = (r__1 = cx[1].r, dabs(r__1)) + (r__2 = r_imag(&cx[1]), dabs(r__2));
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       if ((r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), dabs(
-               r__2)) <= smax) {
-           goto L30;
-       }
-       ret_val = i__;
-       i__2 = i__;
-       smax = (r__1 = cx[i__2].r, dabs(r__1)) + (r__2 = r_imag(&cx[i__]), 
-               dabs(r__2));
-L30:
-       ;
-    }
-    return ret_val;
-} /* icamax_ */
-
-integer idamax_(integer *n, doublereal *dx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-    doublereal d__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal dmax__;
-
-
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --dx;
-
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    dmax__ = abs(dx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((d__1 = dx[ix], abs(d__1)) <= dmax__) {
-           goto L5;
-       }
-       ret_val = i__;
-       dmax__ = (d__1 = dx[ix], abs(d__1));
-L5:
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    dmax__ = abs(dx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((d__1 = dx[i__], abs(d__1)) <= dmax__) {
-           goto L30;
-       }
-       ret_val = i__;
-       dmax__ = (d__1 = dx[i__], abs(d__1));
-L30:
-       ;
-    }
-    return ret_val;
-} /* idamax_ */
-
-integer isamax_(integer *n, real *sx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-    real r__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static real smax;
-
-
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sx;
-
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    smax = dabs(sx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((r__1 = sx[ix], dabs(r__1)) <= smax) {
-           goto L5;
-       }
-       ret_val = i__;
-       smax = (r__1 = sx[ix], dabs(r__1));
-L5:
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    smax = dabs(sx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if ((r__1 = sx[i__], dabs(r__1)) <= smax) {
-           goto L30;
-       }
-       ret_val = i__;
-       smax = (r__1 = sx[i__], dabs(r__1));
-L30:
-       ;
-    }
-    return ret_val;
-} /* isamax_ */
-
-integer izamax_(integer *n, doublecomplex *zx, integer *incx)
-{
-    /* System generated locals */
-    integer ret_val, i__1;
-
-    /* Local variables */
-    static integer i__, ix;
-    static doublereal smax;
-    extern doublereal dcabs1_(doublecomplex *);
-
-
-/*     finds the index of element having max. absolute value. */
-/*     jack dongarra, 1/15/85. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --zx;
-
-    /* Function Body */
-    ret_val = 0;
-    if (*n < 1 || *incx <= 0) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    smax = dcabs1_(&zx[1]);
-    ix += *incx;
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if (dcabs1_(&zx[ix]) <= smax) {
-           goto L5;
-       }
-       ret_val = i__;
-       smax = dcabs1_(&zx[ix]);
-L5:
-       ix += *incx;
-/* L10: */
-    }
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    smax = dcabs1_(&zx[1]);
-    i__1 = *n;
-    for (i__ = 2; i__ <= i__1; ++i__) {
-       if (dcabs1_(&zx[i__]) <= smax) {
-           goto L30;
-       }
-       ret_val = i__;
-       smax = dcabs1_(&zx[i__]);
-L30:
-       ;
-    }
-    return ret_val;
-} /* izamax_ */
-
-
-doublereal sasum_(integer *n, real *sx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val, r__1, r__2, r__3, r__4, r__5, r__6;
-
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-    static real stemp;
-
-
-/*     takes the sum of the absolute values. */
-/*     uses unrolled loops for increment equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sx;
-
-    /* Function Body */
-    ret_val = 0.f;
-    stemp = 0.f;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       stemp += (r__1 = sx[i__], dabs(r__1));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 6;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       stemp += (r__1 = sx[i__], dabs(r__1));
-/* L30: */
-    }
-    if (*n < 6) {
-       goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 6) {
-       stemp = stemp + (r__1 = sx[i__], dabs(r__1)) + (r__2 = sx[i__ + 1], 
-               dabs(r__2)) + (r__3 = sx[i__ + 2], dabs(r__3)) + (r__4 = sx[
-               i__ + 3], dabs(r__4)) + (r__5 = sx[i__ + 4], dabs(r__5)) + (
-               r__6 = sx[i__ + 5], dabs(r__6));
-/* L50: */
-    }
-L60:
-    ret_val = stemp;
-    return ret_val;
-} /* sasum_ */
-
-/* Subroutine */ int saxpy_(integer *n, real *sa, real *sx, integer *incx, 
-       real *sy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*     constant times a vector plus a vector. */
-/*     uses unrolled loop for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*sa == 0.f) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[iy] += *sa * sx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 4;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[i__] += *sa * sx[i__];
-/* L30: */
-    }
-    if (*n < 4) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 4) {
-       sy[i__] += *sa * sx[i__];
-       sy[i__ + 1] += *sa * sx[i__ + 1];
-       sy[i__ + 2] += *sa * sx[i__ + 2];
-       sy[i__ + 3] += *sa * sx[i__ + 3];
-/* L50: */
-    }
-    return 0;
-} /* saxpy_ */
-
-doublereal scasum_(integer *n, complex *cx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real ret_val, r__1, r__2;
-
-    /* Builtin functions */
-    double r_imag(complex *);
-
-    /* Local variables */
-    static integer i__, nincx;
-    static real stemp;
-
-
-/*     takes the sum of the absolute values of a complex vector and */
-/*     returns a single precision result. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --cx;
-
-    /* Function Body */
-    ret_val = 0.f;
-    stemp = 0.f;
-    if (*n <= 0 || *incx <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       i__3 = i__;
-       stemp = stemp + (r__1 = cx[i__3].r, dabs(r__1)) + (r__2 = r_imag(&cx[
-               i__]), dabs(r__2));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       i__1 = i__;
-       stemp = stemp + (r__1 = cx[i__1].r, dabs(r__1)) + (r__2 = r_imag(&cx[
-               i__]), dabs(r__2));
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-} /* scasum_ */
-
-doublereal scnrm2_(integer *n, complex *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    real ret_val, r__1;
-
-    /* Builtin functions */
-    double r_imag(complex *), sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static real ssq, temp, norm, scale;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  SCNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-
-/*     SCNRM2 := sqrt( conjg( x' )*x ) */
-
-
-
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to CLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.f;
-    } else {
-       scale = 0.f;
-       ssq = 1.f;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL CLASSQ( N, X, INCX, SCALE, SSQ ) */
-
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           i__3 = ix;
-           if (x[i__3].r != 0.f) {
-               i__3 = ix;
-               temp = (r__1 = x[i__3].r, dabs(r__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   r__1 = scale / temp;
-                   ssq = ssq * (r__1 * r__1) + 1.f;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   r__1 = temp / scale;
-                   ssq += r__1 * r__1;
-               }
-           }
-           if (r_imag(&x[ix]) != 0.f) {
-               temp = (r__1 = r_imag(&x[ix]), dabs(r__1));
-               if (scale < temp) {
-/* Computing 2nd power */
-                   r__1 = scale / temp;
-                   ssq = ssq * (r__1 * r__1) + 1.f;
-                   scale = temp;
-               } else {
-/* Computing 2nd power */
-                   r__1 = temp / scale;
-                   ssq += r__1 * r__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of SCNRM2. */
-
-} /* scnrm2_ */
-
-/* Subroutine */ int scopy_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-
-
-/*     copies a vector, x, to a vector, y. */
-/*     uses unrolled loops for increments equal to 1. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[iy] = sx[ix];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 7;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       sy[i__] = sx[i__];
-/* L30: */
-    }
-    if (*n < 7) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 7) {
-       sy[i__] = sx[i__];
-       sy[i__ + 1] = sx[i__ + 1];
-       sy[i__ + 2] = sx[i__ + 2];
-       sy[i__ + 3] = sx[i__ + 3];
-       sy[i__ + 4] = sx[i__ + 4];
-       sy[i__ + 5] = sx[i__ + 5];
-       sy[i__ + 6] = sx[i__ + 6];
-/* L50: */
-    }
-    return 0;
-} /* scopy_ */
-
-doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-    real ret_val;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static real stemp;
-
-
-/*     forms the dot product of two vectors. */
-/*     uses unrolled loops for increments equal to one. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    stemp = 0.f;
-    ret_val = 0.f;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += sx[ix] * sy[iy];
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*        code for both increments equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp += sx[i__] * sy[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       goto L60;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 5) {
-       stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[
-               i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + 
-               4] * sy[i__ + 4];
-/* L50: */
-    }
-L60:
-    ret_val = stemp;
-    return ret_val;
-} /* sdot_ */
-
-/* DECK SDSDOT */
-doublereal sdsdot_(integer *n, real *sb, real *sx, integer *incx, real *sy, 
-       integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val;
-
-    /* Local variables */
-    static integer i__, ns, kx, ky;
-    static doublereal dsdot;
-
-/* ***BEGIN PROLOGUE  SDSDOT */
-/* ***PURPOSE  Compute the inner product of two vectors with extended */
-/*            precision accumulation. */
-/* ***LIBRARY   SLATEC (BLAS) */
-/* ***CATEGORY  D1A4 */
-/* ***TYPE      SINGLE PRECISION (SDSDOT-S, CDCDOT-C) */
-/* ***KEYWORDS  BLAS, DOT PRODUCT, INNER PRODUCT, LINEAR ALGEBRA, VECTOR */
-/* ***AUTHOR  Lawson, C. L., (JPL) */
-/*           Hanson, R. J., (SNLA) */
-/*           Kincaid, D. R., (U. of Texas) */
-/*           Krogh, F. T., (JPL) */
-/* ***DESCRIPTION */
-
-/*                B L A S  Subprogram */
-/*    Description of Parameters */
-
-/*     --Input-- */
-/*        N  number of elements in input vector(s) */
-/*       SB  single precision scalar to be added to inner product */
-/*       SX  single precision vector with N elements */
-/*     INCX  storage spacing between elements of SX */
-/*       SY  single precision vector with N elements */
-/*     INCY  storage spacing between elements of SY */
-
-/*     --Output-- */
-/*   SDSDOT  single precision dot product (SB if N .LE. 0) */
-
-/*     Returns S.P. result with dot product accumulated in D.P. */
-/*     SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY), */
-/*     where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is */
-/*     defined in a similar way using INCY. */
-
-/* ***REFERENCES  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T. */
-/*                 Krogh, Basic linear algebra subprograms for Fortran */
-/*                 usage, Algorithm No. 539, Transactions on Mathematical */
-/*                 Software 5, 3 (September 1979), pp. 308-323. */
-/* ***ROUTINES CALLED  (NONE) */
-/* ***REVISION HISTORY  (YYMMDD) */
-/*   791001  DATE WRITTEN */
-/*   890531  Changed all specific intrinsics to generic.  (WRB) */
-/*   890831  Modified array declarations.  (WRB) */
-/*   890831  REVISION DATE from Version 3.2 */
-/*   891214  Prologue converted to Version 4.0 format.  (BAB) */
-/*   920310  Corrected definition of LX in DESCRIPTION.  (WRB) */
-/*   920501  Reformatted the REFERENCES section.  (WRB) */
-/* ***END PROLOGUE  SDSDOT */
-/* ***FIRST EXECUTABLE STATEMENT  SDSDOT */
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    dsdot = *sb;
-    if (*n <= 0) {
-       goto L30;
-    }
-    if (*incx == *incy && *incx > 0) {
-       goto L40;
-    }
-
-/*     Code for unequal or nonpositive increments. */
-
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       dsdot += (doublereal) sx[kx] * (doublereal) sy[ky];
-       kx += *incx;
-       ky += *incy;
-/* L10: */
-    }
-L30:
-    ret_val = dsdot;
-    return ret_val;
-
-/*     Code for equal and positive increments. */
-
-L40:
-    ns = *n * *incx;
-    i__1 = ns;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       dsdot += (doublereal) sx[i__] * (doublereal) sy[i__];
-/* L50: */
-    }
-    ret_val = dsdot;
-    return ret_val;
-} /* sdsdot_ */
-
-/* Subroutine */ int sgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, real *alpha, real *a, integer *lda, real *x, integer *
-       incx, real *beta, real *y, integer *incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static real temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SGBMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("SGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       y[i__] += temp * a[k + i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       y[iy] += temp * a[k + i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               k = kup1 - j;
-/* Computing MAX */
-               i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__2 = min(i__5,i__6);
-               for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               ix = kx;
-               k = kup1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-               i__5 = *m, i__6 = j + *kl;
-               i__4 = min(i__5,i__6);
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   temp += a[k + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SGBMV . */
-
-} /* sgbmv_ */
-
-/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, real *alpha, real *a, integer *lda, real *b, integer *
-       ldb, real *beta, real *c__, integer *ldc, ftnlen transa_len, ftnlen 
-       transb_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static real temp;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SGEMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-
-/*  where  op( X ) is one of */
-
-/*     op( X ) = X   or   op( X ) = X', */
-
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c',  op( A ) = A'. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSB - CHARACTER*1. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-
-/*              TRANSB = 'C' or 'c',  op( B ) = B'. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows */
-/*     and  columns of  A  and the  number of  rows  of  B  respectively. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! nota && ! lsame_(transa, "C", (ftnlen)1, (ftnlen)1) && ! lsame_(
-           transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! lsame_(transb, "C", (ftnlen)1, (ftnlen)1) && ! 
-           lsame_(transb, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("SGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-
-/*     And if  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       if (*beta == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (notb) {
-       if (nota) {
-
-/*           Form  C := alpha*A*B + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[l + j * b_dim1] != 0.f) {
-                       temp = *alpha * b[l + j * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-/* L100: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       }
-    } else {
-       if (nota) {
-
-/*           Form  C := alpha*A*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L130: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L140: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (b[j + l * b_dim1] != 0.f) {
-                       temp = *alpha * b[j + l * b_dim1];
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L150: */
-                       }
-                   }
-/* L160: */
-               }
-/* L170: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * b[j + l * b_dim1];
-/* L180: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SGEMM . */
-
-} /* sgemm_ */
-
-/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, 
-       real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
-       integer *incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SGEMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("SGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       y[iy] += temp * a[i__ + j * a_dim1];
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L100: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp = 0.f;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-/* L110: */
-               }
-               y[jy] += *alpha * temp;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SGEMV . */
-
-} /* sgemv_ */
-
-/* Subroutine */ int sger_(integer *m, integer *n, real *alpha, real *x, 
-       integer *incx, real *y, integer *incy, real *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static real temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SGER   performs the rank 1 operation */
-
-/*     A := alpha*x*y' + A, */
-
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("SGER  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.f) {
-               temp = *alpha * y[jy];
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           if (y[jy] != 0.f) {
-               temp = *alpha * y[jy];
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   a[i__ + j * a_dim1] += x[ix] * temp;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-
-    return 0;
-
-/*     End of SGER  . */
-
-} /* sger_ */
-
-doublereal snrm2_(integer *n, real *x, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    real ret_val, r__1;
-
-    /* Builtin functions */
-    double sqrt(doublereal);
-
-    /* Local variables */
-    static integer ix;
-    static real ssq, norm, scale, absxi;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  SNRM2 returns the euclidean norm of a vector via the function */
-/*  name, so that */
-
-/*     SNRM2 := sqrt( x'*x ) */
-
-
-
-/*  -- This version written on 25-October-1982. */
-/*     Modified on 14-October-1993 to inline the call to SLASSQ. */
-/*     Sven Hammarling, Nag Ltd. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-    /* Parameter adjustments */
-    --x;
-
-    /* Function Body */
-    if (*n < 1 || *incx < 1) {
-       norm = 0.f;
-    } else if (*n == 1) {
-       norm = dabs(x[1]);
-    } else {
-       scale = 0.f;
-       ssq = 1.f;
-/*        The following loop is equivalent to this call to the LAPACK */
-/*        auxiliary routine: */
-/*        CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
-
-       i__1 = (*n - 1) * *incx + 1;
-       i__2 = *incx;
-       for (ix = 1; i__2 < 0 ? ix >= i__1 : ix <= i__1; ix += i__2) {
-           if (x[ix] != 0.f) {
-               absxi = (r__1 = x[ix], dabs(r__1));
-               if (scale < absxi) {
-/* Computing 2nd power */
-                   r__1 = scale / absxi;
-                   ssq = ssq * (r__1 * r__1) + 1.f;
-                   scale = absxi;
-               } else {
-/* Computing 2nd power */
-                   r__1 = absxi / scale;
-                   ssq += r__1 * r__1;
-               }
-           }
-/* L10: */
-       }
-       norm = scale * sqrt(ssq);
-    }
-
-    ret_val = norm;
-    return ret_val;
-
-/*     End of SNRM2. */
-
-} /* snrm2_ */
-
-/* Subroutine */ int srot_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy, real *c__, real *s)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static real stemp;
-
-
-/*     applies a plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = *c__ * sx[ix] + *s * sy[iy];
-       sy[iy] = *c__ * sy[iy] - *s * sx[ix];
-       sx[ix] = stemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = *c__ * sx[i__] + *s * sy[i__];
-       sy[i__] = *c__ * sy[i__] - *s * sx[i__];
-       sx[i__] = stemp;
-/* L30: */
-    }
-    return 0;
-} /* srot_ */
-
-double r_sign(real *a, real *b)
- {
- double x;
- x = (*a >= 0 ? *a : - *a);
- return( *b >= 0 ? x : -x);
- }
-
-/* Subroutine */ int srotg_(real *sa, real *sb, real *c__, real *s)
-{
-    /* System generated locals */
-    real r__1, r__2;
-
-    /* Builtin functions */
-    double sqrt(doublereal), r_sign(real *, real *);
-
-    /* Local variables */
-    static real r__, z__, roe, scale;
-
-
-/*     construct givens plane rotation. */
-/*     jack dongarra, linpack, 3/11/78. */
-
-
-    roe = *sb;
-    if (dabs(*sa) > dabs(*sb)) {
-       roe = *sa;
-    }
-    scale = dabs(*sa) + dabs(*sb);
-    if (scale != 0.f) {
-       goto L10;
-    }
-    *c__ = 1.f;
-    *s = 0.f;
-    r__ = 0.f;
-    z__ = 0.f;
-    goto L20;
-L10:
-/* Computing 2nd power */
-    r__1 = *sa / scale;
-/* Computing 2nd power */
-    r__2 = *sb / scale;
-    r__ = scale * sqrt(r__1 * r__1 + r__2 * r__2);
-    r__ = r_sign(&c_b1543, &roe) * r__;
-    *c__ = *sa / r__;
-    *s = *sb / r__;
-    z__ = 1.f;
-    if (dabs(*sa) > dabs(*sb)) {
-       z__ = *s;
-    }
-    if (dabs(*sb) >= dabs(*sa) && *c__ != 0.f) {
-       z__ = 1.f / *c__;
-    }
-L20:
-    *sa = r__;
-    *sb = z__;
-    return 0;
-} /* srotg_ */
-
-/* Subroutine */ int srotm_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy, real *sparam)
-{
-    /* Initialized data */
-
-    static real zero = 0.f;
-    static real two = 2.f;
-
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__;
-    static real w, z__;
-    static integer kx, ky;
-    static real sh11, sh12, sh21, sh22, sflag;
-    static integer nsteps;
-
-
-/*     APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX */
-
-/*     (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN */
-/*     (DX**T) */
-
-/*     SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE */
-/*     LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY. */
-/*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-/*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
-
-/*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-/*     SEE  SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM. */
-
-    /* Parameter adjustments */
-    --sparam;
-    --sy;
-    --sx;
-
-    /* Function Body */
-
-    sflag = sparam[1];
-    if (*n <= 0 || sflag + two == zero) {
-       goto L140;
-    }
-    if (! (*incx == *incy && *incx > 0)) {
-       goto L70;
-    }
-
-    nsteps = *n * *incx;
-    if (sflag < 0.f) {
-       goto L50;
-    } else if (sflag == 0) {
-       goto L10;
-    } else {
-       goto L30;
-    }
-L10:
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = sx[i__];
-       z__ = sy[i__];
-       sx[i__] = w + z__ * sh12;
-       sy[i__] = w * sh21 + z__;
-/* L20: */
-    }
-    goto L140;
-L30:
-    sh11 = sparam[2];
-    sh22 = sparam[5];
-    i__2 = nsteps;
-    i__1 = *incx;
-    for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
-       w = sx[i__];
-       z__ = sy[i__];
-       sx[i__] = w * sh11 + z__;
-       sy[i__] = -w + sh22 * z__;
-/* L40: */
-    }
-    goto L140;
-L50:
-    sh11 = sparam[2];
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    sh22 = sparam[5];
-    i__1 = nsteps;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       w = sx[i__];
-       z__ = sy[i__];
-       sx[i__] = w * sh11 + z__ * sh12;
-       sy[i__] = w * sh21 + z__ * sh22;
-/* L60: */
-    }
-    goto L140;
-L70:
-    kx = 1;
-    ky = 1;
-    if (*incx < 0) {
-       kx = (1 - *n) * *incx + 1;
-    }
-    if (*incy < 0) {
-       ky = (1 - *n) * *incy + 1;
-    }
-
-    if (sflag < 0.f) {
-       goto L120;
-    } else if (sflag == 0) {
-       goto L80;
-    } else {
-       goto L100;
-    }
-L80:
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = sx[kx];
-       z__ = sy[ky];
-       sx[kx] = w + z__ * sh12;
-       sy[ky] = w * sh21 + z__;
-       kx += *incx;
-       ky += *incy;
-/* L90: */
-    }
-    goto L140;
-L100:
-    sh11 = sparam[2];
-    sh22 = sparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = sx[kx];
-       z__ = sy[ky];
-       sx[kx] = w * sh11 + z__;
-       sy[ky] = -w + sh22 * z__;
-       kx += *incx;
-       ky += *incy;
-/* L110: */
-    }
-    goto L140;
-L120:
-    sh11 = sparam[2];
-    sh12 = sparam[4];
-    sh21 = sparam[3];
-    sh22 = sparam[5];
-    i__2 = *n;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       w = sx[kx];
-       z__ = sy[ky];
-       sx[kx] = w * sh11 + z__ * sh12;
-       sy[ky] = w * sh21 + z__ * sh22;
-       kx += *incx;
-       ky += *incy;
-/* L130: */
-    }
-L140:
-    return 0;
-} /* srotm_ */
-
-/* Subroutine */ int srotmg_(real *sd1, real *sd2, real *sx1, real *sy1, real 
-       *sparam)
-{
-    /* Initialized data */
-
-    static real zero = 0.f;
-    static real one = 1.f;
-    static real two = 2.f;
-    static real gam = 4096.f;
-    static real gamsq = 16777200.f;
-    static real rgamsq = 5.96046e-8f;
-
-    /* Format strings */
-    static char fmt_120[] = "";
-    static char fmt_150[] = "";
-    static char fmt_180[] = "";
-    static char fmt_210[] = "";
-
-    /* System generated locals */
-    real r__1;
-
-    /* Local variables */
-    static real su, sp1, sp2, sq2, sq1, sh11, sh21, sh12, sh22;
-    static integer igo;
-    static real sflag, stemp;
-
-    /* Assigned format variables */
-    static char *igo_fmt;
-
-
-/*     CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS */
-/*     THE SECOND COMPONENT OF THE 2-VECTOR  (SQRT(SD1)*SX1,SQRT(SD2)* */
-/*     SY2)**T. */
-/*     WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS.. */
-
-/*     SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0 */
-
-/*       (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0) */
-/*     H=(          )    (          )    (          )    (          ) */
-/*       (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0). */
-/*     LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22 */
-/*     RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE */
-/*     VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.) */
-
-/*     THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE */
-/*     INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE */
-/*     OF SD1 AND SD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM. */
-
-
-    /* Parameter adjustments */
-    --sparam;
-
-    /* Function Body */
-    if (! (*sd1 < zero)) {
-       goto L10;
-    }
-/*       GO ZERO-H-D-AND-SX1.. */
-    goto L60;
-L10:
-/*     CASE-SD1-NONNEGATIVE */
-    sp2 = *sd2 * *sy1;
-    if (! (sp2 == zero)) {
-       goto L20;
-    }
-    sflag = -two;
-    goto L260;
-/*     REGULAR-CASE.. */
-L20:
-    sp1 = *sd1 * *sx1;
-    sq2 = sp2 * *sy1;
-    sq1 = sp1 * *sx1;
-
-    if (! (dabs(sq1) > dabs(sq2))) {
-       goto L40;
-    }
-    sh21 = -(*sy1) / *sx1;
-    sh12 = sp2 / sp1;
-
-    su = one - sh12 * sh21;
-
-    if (! (su <= zero)) {
-       goto L30;
-    }
-/*         GO ZERO-H-D-AND-SX1.. */
-    goto L60;
-L30:
-    sflag = zero;
-    *sd1 /= su;
-    *sd2 /= su;
-    *sx1 *= su;
-/*         GO SCALE-CHECK.. */
-    goto L100;
-L40:
-    if (! (sq2 < zero)) {
-       goto L50;
-    }
-/*         GO ZERO-H-D-AND-SX1.. */
-    goto L60;
-L50:
-    sflag = one;
-    sh11 = sp1 / sp2;
-    sh22 = *sx1 / *sy1;
-    su = one + sh11 * sh22;
-    stemp = *sd2 / su;
-    *sd2 = *sd1 / su;
-    *sd1 = stemp;
-    *sx1 = *sy1 * su;
-/*         GO SCALE-CHECK */
-    goto L100;
-/*     PROCEDURE..ZERO-H-D-AND-SX1.. */
-L60:
-    sflag = -one;
-    sh11 = zero;
-    sh12 = zero;
-    sh21 = zero;
-    sh22 = zero;
-
-    *sd1 = zero;
-    *sd2 = zero;
-    *sx1 = zero;
-/*         RETURN.. */
-    goto L220;
-/*     PROCEDURE..FIX-H.. */
-L70:
-    if (! (sflag >= zero)) {
-       goto L90;
-    }
-
-    if (! (sflag == zero)) {
-       goto L80;
-    }
-    sh11 = one;
-    sh22 = one;
-    sflag = -one;
-    goto L90;
-L80:
-    sh21 = -one;
-    sh12 = one;
-    sflag = -one;
-L90:
-    switch (igo) {
-       case 0: goto L120;
-       case 1: goto L150;
-       case 2: goto L180;
-       case 3: goto L210;
-    }
-/*     PROCEDURE..SCALE-CHECK */
-L100:
-L110:
-    if (! (*sd1 <= rgamsq)) {
-       goto L130;
-    }
-    if (*sd1 == zero) {
-       goto L160;
-    }
-    igo = 0;
-    igo_fmt = fmt_120;
-/*              FIX-H.. */
-    goto L70;
-L120:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd1 *= r__1 * r__1;
-    *sx1 /= gam;
-    sh11 /= gam;
-    sh12 /= gam;
-    goto L110;
-L130:
-L140:
-    if (! (*sd1 >= gamsq)) {
-       goto L160;
-    }
-    igo = 1;
-    igo_fmt = fmt_150;
-/*              FIX-H.. */
-    goto L70;
-L150:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd1 /= r__1 * r__1;
-    *sx1 *= gam;
-    sh11 *= gam;
-    sh12 *= gam;
-    goto L140;
-L160:
-L170:
-    if (! (dabs(*sd2) <= rgamsq)) {
-       goto L190;
-    }
-    if (*sd2 == zero) {
-       goto L220;
-    }
-    igo = 2;
-    igo_fmt = fmt_180;
-/*              FIX-H.. */
-    goto L70;
-L180:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd2 *= r__1 * r__1;
-    sh21 /= gam;
-    sh22 /= gam;
-    goto L170;
-L190:
-L200:
-    if (! (dabs(*sd2) >= gamsq)) {
-       goto L220;
-    }
-    igo = 3;
-    igo_fmt = fmt_210;
-/*              FIX-H.. */
-    goto L70;
-L210:
-/* Computing 2nd power */
-    r__1 = gam;
-    *sd2 /= r__1 * r__1;
-    sh21 *= gam;
-    sh22 *= gam;
-    goto L200;
-L220:
-    if (sflag < 0.f) {
-       goto L250;
-    } else if (sflag == 0) {
-       goto L230;
-    } else {
-       goto L240;
-    }
-L230:
-    sparam[3] = sh21;
-    sparam[4] = sh12;
-    goto L260;
-L240:
-    sparam[2] = sh11;
-    sparam[5] = sh22;
-    goto L260;
-L250:
-    sparam[2] = sh11;
-    sparam[3] = sh21;
-    sparam[4] = sh12;
-    sparam[5] = sh22;
-L260:
-    sparam[1] = sflag;
-    return 0;
-} /* srotmg_ */
-
-/* Subroutine */ int ssbmv_(char *uplo, integer *n, integer *k, real *alpha, 
-       real *a, integer *lda, real *x, integer *incx, real *beta, real *y, 
-       integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSBMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric band matrix, with k super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the symmetric matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a symmetric band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("SSBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[kplus1 + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[kplus1 + j * a_dim1] + *alpha * 
-                       temp2;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               y[j] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   y[i__] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               y[jy] += temp1 * a[j * a_dim1 + 1];
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[l + i__ + j * a_dim1];
-                   temp2 += a[l + i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSBMV . */
-
-} /* ssbmv_ */
-
-/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, m, mp1, nincx;
-
-
-/*     scales a vector by a constant. */
-/*     uses unrolled loops for increment equal to 1. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
-       sx[i__] = *sa * sx[i__];
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-
-/*        clean-up loop */
-
-L20:
-    m = *n % 5;
-    if (m == 0) {
-       goto L40;
-    }
-    i__2 = m;
-    for (i__ = 1; i__ <= i__2; ++i__) {
-       sx[i__] = *sa * sx[i__];
-/* L30: */
-    }
-    if (*n < 5) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__2 = *n;
-    for (i__ = mp1; i__ <= i__2; i__ += 5) {
-       sx[i__] = *sa * sx[i__];
-       sx[i__ + 1] = *sa * sx[i__ + 1];
-       sx[i__ + 2] = *sa * sx[i__ + 2];
-       sx[i__ + 3] = *sa * sx[i__ + 3];
-       sx[i__ + 4] = *sa * sx[i__ + 4];
-/* L50: */
-    }
-    return 0;
-} /* sscal_ */
-
-/* Subroutine */ int sspmv_(char *uplo, integer *n, real *alpha, real *ap, 
-       real *x, integer *incx, real *beta, real *y, integer *incy, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSPMV  performs the matrix-vector operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("SSPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * ap[kk + j - 1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               y[j] += temp1 * ap[kk];
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * ap[k];
-                   temp2 += ap[k] * x[i__];
-                   ++k;
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               y[jy] += temp1 * ap[kk];
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * ap[k];
-                   temp2 += ap[k] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSPMV . */
-
-} /* sspmv_ */
-
-/* Subroutine */ int sspr_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSPR    performs the symmetric rank 1 operation */
-
-/*     A := alpha*x*x' + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("SSPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] += x[i__] * temp;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSPR  . */
-
-} /* sspr_ */
-
-/* Subroutine */ int sspr2_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *y, integer *incy, real *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSPR2  performs the symmetric rank 2 operation */
-
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n symmetric matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the symmetric matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("SSPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L10: */
-                   }
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 1;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   k = kk;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       ap[k] = ap[k] + x[i__] * temp1 + y[i__] * temp2;
-                       ++k;
-/* L50: */
-                   }
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk; k <= i__2; ++k) {
-                       ap[k] = ap[k] + x[ix] * temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSPR2 . */
-
-} /* sspr2_ */
-
-/* Subroutine */ int sswap_(integer *n, real *sx, integer *incx, real *sy, 
-       integer *incy)
-{
-    /* System generated locals */
-    integer i__1;
-
-    /* Local variables */
-    static integer i__, m, ix, iy, mp1;
-    static real stemp;
-
-
-/*     interchanges two vectors. */
-/*     uses unrolled loops for increments equal to 1. */
-/*     jack dongarra, linpack, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --sy;
-    --sx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = sx[ix];
-       sx[ix] = sy[iy];
-       sy[iy] = stemp;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-
-/*       clean-up loop */
-
-L20:
-    m = *n % 3;
-    if (m == 0) {
-       goto L40;
-    }
-    i__1 = m;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       stemp = sx[i__];
-       sx[i__] = sy[i__];
-       sy[i__] = stemp;
-/* L30: */
-    }
-    if (*n < 3) {
-       return 0;
-    }
-L40:
-    mp1 = m + 1;
-    i__1 = *n;
-    for (i__ = mp1; i__ <= i__1; i__ += 3) {
-       stemp = sx[i__];
-       sx[i__] = sy[i__];
-       sy[i__] = stemp;
-       stemp = sx[i__ + 1];
-       sx[i__ + 1] = sy[i__ + 1];
-       sy[i__ + 1] = stemp;
-       stemp = sx[i__ + 2];
-       sx[i__ + 2] = sy[i__ + 2];
-       sy[i__ + 2] = stemp;
-/* L50: */
-    }
-    return 0;
-} /* sswap_ */
-
-/* Subroutine */ int ssymm_(char *side, char *uplo, integer *m, integer *n, 
-       real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
-        real *c__, integer *ldc, ftnlen side_len, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSYMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*A*B + beta*C, */
-
-/*  or */
-
-/*     C := alpha*B*A + beta*C, */
-
-/*  where alpha and beta are scalars,  A is a symmetric matrix and  B and */
-/*  C are  m by n matrices. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is  n otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - REAL             array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set NROWA as the number of rows of A. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("SSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       if (*beta == 0.f) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.f;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L50: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   temp1 = *alpha * b[i__ + j * b_dim1];
-                   temp2 = 0.f;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1];
-                       temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1];
-/* L80: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] 
-                               + *alpha * temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + temp1 * a[i__ + i__ * a_dim1] + *alpha * 
-                               temp2;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*B*A + beta*C. */
-
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           temp1 = *alpha * a[j + j * a_dim1];
-           if (*beta == 0.f) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1];
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + 
-                           temp1 * b[i__ + j * b_dim1];
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               } else {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   temp1 = *alpha * a[j + k * a_dim1];
-               } else {
-                   temp1 = *alpha * a[k + j * a_dim1];
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1];
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-
-    return 0;
-
-/*     End of SSYMM . */
-
-} /* ssymm_ */
-
-/* Subroutine */ int ssymv_(char *uplo, integer *n, real *alpha, real *a, 
-       integer *lda, real *x, integer *incx, real *beta, real *y, integer *
-       incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSYMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n symmetric matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("SSYMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f && *beta == 1.f) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-/*     First form  y := beta*y. */
-
-    if (*beta != 1.f) {
-       if (*incy == 1) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = 0.f;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[i__] = *beta * y[i__];
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = 0.f;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   y[iy] = *beta * y[iy];
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (*alpha == 0.f) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when A is stored in upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L50: */
-               }
-               y[j] = y[j] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               y[jy] = y[jy] + temp1 * a[j + j * a_dim1] + *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when A is stored in lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[j];
-               temp2 = 0.f;
-               y[j] += temp1 * a[j + j * a_dim1];
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   y[i__] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-               }
-               y[j] += *alpha * temp2;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp1 = *alpha * x[jx];
-               temp2 = 0.f;
-               y[jy] += temp1 * a[j + j * a_dim1];
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   y[iy] += temp1 * a[i__ + j * a_dim1];
-                   temp2 += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-               }
-               y[jy] += *alpha * temp2;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSYMV . */
-
-} /* ssymv_ */
-
-/* Subroutine */ int ssyr_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *a, integer *lda, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSYR   performs the symmetric rank 1 operation */
-
-/*     A := alpha*x*x' + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n symmetric matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("SSYR  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in upper triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = kx;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in lower triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f) {
-                   temp = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[i__] * temp;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f) {
-                   temp = *alpha * x[jx];
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] += x[ix] * temp;
-                       ix += *incx;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSYR  . */
-
-} /* ssyr_ */
-
-/* Subroutine */ int ssyr2_(char *uplo, integer *n, real *alpha, real *x, 
-       integer *incx, real *y, integer *incy, real *a, integer *lda, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSYR2  performs the symmetric rank 2 operation */
-
-/*     A := alpha*x*y' + alpha*y*x' + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n symmetric matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the symmetric matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("SSYR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.f) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L10: */
-                   }
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[j] != 0.f || y[j] != 0.f) {
-                   temp1 = *alpha * y[j];
-                   temp2 = *alpha * x[j];
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[i__] * 
-                               temp1 + y[i__] * temp2;
-/* L50: */
-                   }
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (x[jx] != 0.f || y[jy] != 0.f) {
-                   temp1 = *alpha * y[jy];
-                   temp2 = *alpha * x[jx];
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       a[i__ + j * a_dim1] = a[i__ + j * a_dim1] + x[ix] * 
-                               temp1 + y[iy] * temp2;
-                       ix += *incx;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSYR2 . */
-
-} /* ssyr2_ */
-
-/* Subroutine */ int ssyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta,
-        real *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static real temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSYR2K  performs one of the symmetric rank 2k operations */
-
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*B' + alpha*B*A' + */
-/*                                        beta*C. */
-
-/*              TRANS = 'T' or 't'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-
-/*              TRANS = 'C' or 'c'   C := alpha*A'*B + alpha*B'*A + */
-/*                                        beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrices  A and B.  K must be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - REAL             array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("SSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) 
-                           {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f || b[j + l * b_dim1] != 0.f) 
-                           {
-                       temp1 = *alpha * b[j + l * b_dim1];
-                       temp2 = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] = c__[i__ + j * c_dim1] + a[
-                                   i__ + l * a_dim1] * temp1 + b[i__ + l * 
-                                   b_dim1] * temp2;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1 = 0.f;
-                   temp2 = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1 = 0.f;
-                   temp2 = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp1 += a[l + i__ * a_dim1] * b[l + j * b_dim1];
-                       temp2 += b[l + i__ * b_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp1 + *alpha * 
-                               temp2;
-                   } else {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] 
-                               + *alpha * temp1 + *alpha * temp2;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSYR2K. */
-
-} /* ssyr2k_ */
-
-/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       real *alpha, real *a, integer *lda, real *beta, real *c__, integer *
-       ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  SSYRK  performs one of the symmetric rank k operations */
-
-/*     C := alpha*A*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars, C is an  n by n  symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-
-/*              TRANS = 'C' or 'c'   C := alpha*A'*A + beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number */
-/*           of rows of the matrix  A.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - REAL            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - REAL             array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("SSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (*alpha == 0.f || *k == 0) && *beta == 1.f) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       if (upper) {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.f) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*A' + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L90: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = 0.f;
-/* L140: */
-                   }
-               } else if (*beta != 1.f) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1];
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   if (a[j + l * a_dim1] != 0.f) {
-                       temp = *alpha * a[j + l * a_dim1];
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           c__[i__ + j * c_dim1] += temp * a[i__ + l * 
-                                   a_dim1];
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*A + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L190: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp = 0.f;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       temp += a[l + i__ * a_dim1] * a[l + j * a_dim1];
-/* L220: */
-                   }
-                   if (*beta == 0.f) {
-                       c__[i__ + j * c_dim1] = *alpha * temp;
-                   } else {
-                       c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[
-                               i__ + j * c_dim1];
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of SSYRK . */
-
-} /* ssyrk_ */
-
-/* Subroutine */ int stbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STBMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("STBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*         Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[kplus1 + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           x[i__] += temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j * a_dim1 + 1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           x[ix] += temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j * a_dim1 + 1];
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (nounit) {
-                       temp *= a[kplus1 + j * a_dim1];
-                   }
-/* Computing MAX */
-                   i__4 = 1, i__1 = j - *k;
-                   i__3 = max(i__4,i__1);
-                   for (i__ = j - 1; i__ >= i__3; --i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[j];
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   temp = x[jx];
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (nounit) {
-                       temp *= a[j * a_dim1 + 1];
-                   }
-/* Computing MIN */
-                   i__1 = *n, i__2 = j + *k;
-                   i__4 = min(i__1,i__2);
-                   for (i__ = j + 1; i__ <= i__4; ++i__) {
-                       temp += a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STBMV . */
-
-} /* stbmv_ */
-
-/* Subroutine */ int stbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, real *a, integer *lda, real *x, integer *incx, ftnlen 
-       uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STBSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("STBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[j] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[j];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   if (x[jx] != 0.f) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           x[jx] /= a[kplus1 + j * a_dim1];
-                       }
-                       temp = x[jx];
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       l = 1 - j;
-                       if (nounit) {
-                           x[j] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[j];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[l + i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   if (x[jx] != 0.f) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           x[jx] /= a[j * a_dim1 + 1];
-                       }
-                       temp = x[jx];
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[ix] -= temp * a[l + i__ + j * a_dim1];
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A')*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *k;
-                   i__4 = j - 1;
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = kplus1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *k;
-                   i__3 = j - 1;
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[kplus1 + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   l = 1 - j;
-/* Computing MIN */
-                   i__1 = *n, i__3 = j + *k;
-                   i__4 = j + 1;
-                   for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   l = 1 - j;
-/* Computing MIN */
-                   i__4 = *n, i__1 = j + *k;
-                   i__3 = j + 1;
-                   for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                       temp -= a[l + i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j * a_dim1 + 1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STBSV . */
-
-} /* stbsv_ */
-
-/* Subroutine */ int stpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *ap, real *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, 
-       ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STPMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("STPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x:= A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * ap[k];
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk + j - 1];
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           x[ix] += temp * ap[k];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk + j - 1];
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * ap[k];
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= ap[kk - *n + j];
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           x[ix] += temp * ap[k];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= ap[kk - *n + j];
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk - 1;
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += ap[k] * x[i__];
-                       --k;
-/* L90: */
-                   }
-                   x[j] = temp;
-                   kk -= j;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__1 = kk - j + 1;
-                   for (k = kk - 1; k >= i__1; --k) {
-                       ix -= *incx;
-                       temp += ap[k] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += ap[k] * x[i__];
-                       ++k;
-/* L130: */
-                   }
-                   x[j] = temp;
-                   kk += *n - j + 1;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= ap[kk];
-                   }
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       temp += ap[k] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STPMV . */
-
-} /* stpmv_ */
-
-/* Subroutine */ int stpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *ap, real *x, integer *incx, ftnlen uplo_len, ftnlen trans_len, 
-       ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STPSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - REAL             array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("STPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * ap[k];
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           x[ix] -= temp * ap[k];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= ap[kk];
-                       }
-                       temp = x[j];
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * ap[k];
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= ap[kk];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           x[ix] -= temp * ap[k];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= ap[k] * x[i__];
-                       ++k;
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[j] = temp;
-                   kk += j;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       temp -= ap[k] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk + j - 1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-                   kk += j;
-/* L120: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   k = kk;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= ap[k] * x[i__];
-                       --k;
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[j] = temp;
-                   kk -= *n - j + 1;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = kk - (*n - (j + 1));
-                   for (k = kk; k >= i__1; --k) {
-                       temp -= ap[k] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= ap[kk - *n + j];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STPSV . */
-
-} /* stpsv_ */
-
-/* Subroutine */ int strmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, 
-       integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, 
-       ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STRMM  performs one of the matrix-matrix operations */
-
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ), */
-
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - REAL             array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("STRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*A*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L30: */
-                           }
-                           if (nounit) {
-                               temp *= a[k + k * a_dim1];
-                           }
-                           b[k + j * b_dim1] = temp;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           temp = *alpha * b[k + j * b_dim1];
-                           b[k + j * b_dim1] = temp;
-                           if (nounit) {
-                               b[k + j * b_dim1] *= a[k + k * a_dim1];
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * a[i__ + k * 
-                                       a_dim1];
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*A'*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__2 = i__ - 1;
-                       for (k = 1; k <= i__2; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L90: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L100: */
-                   }
-/* L110: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = b[i__ + j * b_dim1];
-                       if (nounit) {
-                           temp *= a[i__ + i__ * a_dim1];
-                       }
-                       i__3 = *m;
-                       for (k = i__ + 1; k <= i__3; ++k) {
-                           temp += a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L120: */
-                       }
-                       b[i__ + j * b_dim1] = *alpha * temp;
-/* L130: */
-                   }
-/* L140: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*A. */
-
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L150: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L160: */
-                           }
-                       }
-/* L170: */
-                   }
-/* L180: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L190: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           temp = *alpha * a[k + j * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-/* L220: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*A'. */
-
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = *alpha * a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] += temp * b[i__ + k * 
-                                       b_dim1];
-/* L270: */
-                           }
-                       }
-/* L280: */
-                   }
-                   temp = *alpha;
-                   if (nounit) {
-                       temp *= a[k + k * a_dim1];
-                   }
-                   if (temp != 1.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L290: */
-                       }
-                   }
-/* L300: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STRMM . */
-
-} /* strmm_ */
-
-/* Subroutine */ int strmv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *a, integer *lda, real *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STRMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := A'*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("STRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       temp = x[j];
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[i__] += temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                       if (nounit) {
-                           x[j] *= a[j + j * a_dim1];
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       temp = x[jx];
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           x[ix] += temp * a[i__ + j * a_dim1];
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           x[jx] *= a[j + j * a_dim1];
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   for (i__ = j - 1; i__ >= 1; --i__) {
-                       ix -= *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L110: */
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       temp += a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = jx;
-                   if (nounit) {
-                       temp *= a[j + j * a_dim1];
-                   }
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       temp += a[i__ + j * a_dim1] * x[ix];
-/* L150: */
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STRMV . */
-
-} /* strmv_ */
-
-/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, 
-       integer *ldb, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, 
-       ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static real temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STRSM  solves one of the matrix equations */
-
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'. */
-
-/*  The matrix X is overwritten on B. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = A'. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - REAL            . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - REAL             array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("STRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.f) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               b[i__ + j * b_dim1] = 0.f;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*inv( A )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (b[k + j * b_dim1] != 0.f) {
-                           if (nounit) {
-                               b[k + j * b_dim1] /= a[k + k * a_dim1];
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[
-                                       i__ + k * a_dim1];
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*inv( A' )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__3 = i__ - 1;
-                       for (k = 1; k <= i__3; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L110: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L120: */
-                   }
-/* L130: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       temp = *alpha * b[i__ + j * b_dim1];
-                       i__2 = *m;
-                       for (k = i__ + 1; k <= i__2; ++k) {
-                           temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1];
-/* L140: */
-                       }
-                       if (nounit) {
-                           temp /= a[i__ + i__ * a_dim1];
-                       }
-                       b[i__ + j * b_dim1] = temp;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*inv( A ). */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L170: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-                   if (nounit) {
-                       temp = 1.f / a[j + j * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L200: */
-                       }
-                   }
-/* L210: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (*alpha != 1.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1]
-                                   ;
-/* L220: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       if (a[k + j * a_dim1] != 0.f) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[
-                                       i__ + k * b_dim1];
-/* L230: */
-                           }
-                       }
-/* L240: */
-                   }
-                   if (nounit) {
-                       temp = 1.f / a[j + j * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1];
-/* L250: */
-                       }
-                   }
-/* L260: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*inv( A' ). */
-
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       temp = 1.f / a[k + k * a_dim1];
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L270: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = a[j + k * a_dim1];
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L280: */
-                           }
-                       }
-/* L290: */
-                   }
-                   if (*alpha != 1.f) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L300: */
-                       }
-                   }
-/* L310: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       temp = 1.f / a[k + k * a_dim1];
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1];
-/* L320: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       if (a[j + k * a_dim1] != 0.f) {
-                           temp = a[j + k * a_dim1];
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               b[i__ + j * b_dim1] -= temp * b[i__ + k * 
-                                       b_dim1];
-/* L330: */
-                           }
-                       }
-/* L340: */
-                   }
-                   if (*alpha != 1.f) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1]
-                                   ;
-/* L350: */
-                       }
-                   }
-/* L360: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STRSM . */
-
-} /* strsm_ */
-
-/* Subroutine */ int strsv_(char *uplo, char *trans, char *diag, integer *n, 
-       real *a, integer *lda, real *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static real temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  STRSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   A'*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - REAL             array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - REAL             array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("STRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[j] != 0.f) {
-                       if (nounit) {
-                           x[j] /= a[j + j * a_dim1];
-                       }
-                       temp = x[j];
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           x[i__] -= temp * a[i__ + j * a_dim1];
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (x[jx] != 0.f) {
-                       if (nounit) {
-                           x[jx] /= a[j + j * a_dim1];
-                       }
-                       temp = x[jx];
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           x[ix] -= temp * a[i__ + j * a_dim1];
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[j];
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L90: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L100: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix += *incx;
-/* L110: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx += *incx;
-/* L120: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   temp = x[j];
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[i__];
-/* L130: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[j] = temp;
-/* L140: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   temp = x[jx];
-                   ix = kx;
-                   i__1 = j + 1;
-                   for (i__ = *n; i__ >= i__1; --i__) {
-                       temp -= a[i__ + j * a_dim1] * x[ix];
-                       ix -= *incx;
-/* L150: */
-                   }
-                   if (nounit) {
-                       temp /= a[j + j * a_dim1];
-                   }
-                   x[jx] = temp;
-                   jx -= *incx;
-/* L160: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of STRSV . */
-
-} /* strsv_ */
-
-/* Subroutine */ int xerbla_(char *srname, integer *info, ftnlen srname_len)
-{
-    /* Format strings */
-    static char fmt_9999[] = "(\002 ** On entry to \002,a6,\002 parameter nu"
-           "mber \002,i2,\002 had \002,\002an illegal value\002)";
-
-    /* Builtin functions */
-    integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void);
-    /* Subroutine */ int s_stop(char *, ftnlen);
-
-    /* Fortran I/O blocks */
-    static cilist io___916 = { 0, 6, 0, fmt_9999, 0 };
-
-
-
-/*  -- LAPACK auxiliary routine (preliminary version) -- */
-/*     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */
-/*     Courant Institute, Argonne National Lab, and Rice University */
-/*     February 29, 1992 */
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  XERBLA  is an error handler for the LAPACK routines. */
-/*  It is called by an LAPACK routine if an input parameter has an */
-/*  invalid value.  A message is printed and execution stops. */
-
-/*  Installers may consider modifying the STOP statement in order to */
-/*  call system-specific exception-handling facilities. */
-
-/*  Arguments */
-/*  ========= */
-
-/*  SRNAME  (input) CHARACTER*6 */
-/*          The name of the routine which called XERBLA. */
-
-/*  INFO    (input) INTEGER */
-/*          The position of the invalid parameter in the parameter list */
-/*          of the calling routine. */
-
-
-    s_wsfe(&io___916);
-    do_fio(&c__1, srname, (ftnlen)6);
-    do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer));
-    e_wsfe();
-
-    s_stop("", (ftnlen)0);
-
-
-/*     End of XERBLA */
-
-    return 0;
-} /* xerbla_ */
-
-/* Subroutine */ int zaxpy_(integer *n, doublecomplex *za, doublecomplex *zx, 
-       integer *incx, doublecomplex *zy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    doublecomplex z__1, z__2;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    extern doublereal dcabs1_(doublecomplex *);
-
-
-/*     constant times a vector plus a vector. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (dcabs1_(za) == 0.) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = iy;
-       i__4 = ix;
-       z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[
-               i__4].i + za->i * zx[i__4].r;
-       z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       i__4 = i__;
-       z__2.r = za->r * zx[i__4].r - za->i * zx[i__4].i, z__2.i = za->r * zx[
-               i__4].i + za->i * zx[i__4].r;
-       z__1.r = zy[i__3].r + z__2.r, z__1.i = zy[i__3].i + z__2.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-/* L30: */
-    }
-    return 0;
-} /* zaxpy_ */
-
-/* Subroutine */ int zcopy_(integer *n, doublecomplex *zx, integer *incx, 
-       doublecomplex *zy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-
-
-/*     copies a vector, x, to a vector, y. */
-/*     jack dongarra, linpack, 4/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = iy;
-       i__3 = ix;
-       zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       zy[i__2].r = zx[i__3].r, zy[i__2].i = zx[i__3].i;
-/* L30: */
-    }
-    return 0;
-} /* zcopy_ */
-
-/* Double Complex */ VOID zdotc_(doublecomplex * ret_val, integer *n, 
-       doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-
-
-/*     forms the dot product of a vector. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-
-    /* Function Body */
-    ztemp.r = 0., ztemp.i = 0.;
-     ret_val->r = 0.,  ret_val->i = 0.;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       d_cnjg(&z__3, &zx[ix]);
-       i__2 = iy;
-       z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * 
-               zy[i__2].i + z__3.i * zy[i__2].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       d_cnjg(&z__3, &zx[i__]);
-       i__2 = i__;
-       z__2.r = z__3.r * zy[i__2].r - z__3.i * zy[i__2].i, z__2.i = z__3.r * 
-               zy[i__2].i + z__3.i * zy[i__2].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-/* L30: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-} /* zdotc_ */
-
-/* Double Complex */ VOID zdotu_(doublecomplex * ret_val, integer *n, 
-       doublecomplex *zx, integer *incx, doublecomplex *zy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublecomplex z__1, z__2;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-
-
-/*     forms the dot product of two vectors. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-
-    /* Function Body */
-    ztemp.r = 0., ztemp.i = 0.;
-     ret_val->r = 0.,  ret_val->i = 0.;
-    if (*n <= 0) {
-       return ;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*        code for unequal increments or equal increments */
-/*          not equal to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       i__3 = iy;
-       z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = 
-               zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-
-/*        code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       z__2.r = zx[i__2].r * zy[i__3].r - zx[i__2].i * zy[i__3].i, z__2.i = 
-               zx[i__2].r * zy[i__3].i + zx[i__2].i * zy[i__3].r;
-       z__1.r = ztemp.r + z__2.r, z__1.i = ztemp.i + z__2.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-/* L30: */
-    }
-     ret_val->r = ztemp.r,  ret_val->i = ztemp.i;
-    return ;
-} /* zdotu_ */
-
-/* Subroutine */ int zdrot_(integer *n, doublecomplex *zx, integer *incx, 
-       doublecomplex *zy, integer *incy, doublereal *c__, doublereal *s)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-
-
-/*     applies a plane rotation, where the cos and sin (c and s) are */
-/*     double precision and the vectors zx and zy are double complex. */
-/*     jack dongarra, linpack, 3/11/78. */
-
-
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       z__2.r = *c__ * zx[i__2].r, z__2.i = *c__ * zx[i__2].i;
-       i__3 = iy;
-       z__3.r = *s * zy[i__3].r, z__3.i = *s * zy[i__3].i;
-       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       i__2 = iy;
-       i__3 = iy;
-       z__2.r = *c__ * zy[i__3].r, z__2.i = *c__ * zy[i__3].i;
-       i__4 = ix;
-       z__3.r = *s * zx[i__4].r, z__3.i = *s * zx[i__4].i;
-       z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-       i__2 = ix;
-       zx[i__2].r = ztemp.r, zx[i__2].i = ztemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       z__2.r = *c__ * zx[i__2].r, z__2.i = *c__ * zx[i__2].i;
-       i__3 = i__;
-       z__3.r = *s * zy[i__3].r, z__3.i = *s * zy[i__3].i;
-       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-       ztemp.r = z__1.r, ztemp.i = z__1.i;
-       i__2 = i__;
-       i__3 = i__;
-       z__2.r = *c__ * zy[i__3].r, z__2.i = *c__ * zy[i__3].i;
-       i__4 = i__;
-       z__3.r = *s * zx[i__4].r, z__3.i = *s * zx[i__4].i;
-       z__1.r = z__2.r - z__3.r, z__1.i = z__2.i - z__3.i;
-       zy[i__2].r = z__1.r, zy[i__2].i = z__1.i;
-       i__2 = i__;
-       zx[i__2].r = ztemp.r, zx[i__2].i = ztemp.i;
-/* L30: */
-    }
-    return 0;
-} /* zdrot_ */
-
-/* Subroutine */ int zdscal_(integer *n, doublereal *da, doublecomplex *zx, 
-       integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublecomplex z__1, z__2;
-
-    /* Local variables */
-    static integer i__, ix;
-
-
-/*     scales a vector by a constant. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --zx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       z__2.r = *da, z__2.i = 0.;
-       i__3 = ix;
-       z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * 
-               zx[i__3].i + z__2.i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-       ix += *incx;
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       z__2.r = *da, z__2.i = 0.;
-       i__3 = i__;
-       z__1.r = z__2.r * zx[i__3].r - z__2.i * zx[i__3].i, z__1.i = z__2.r * 
-               zx[i__3].i + z__2.i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-/* L30: */
-    }
-    return 0;
-} /* zdscal_ */
-
-/* Subroutine */ int zgbmv_(char *trans, integer *m, integer *n, integer *kl, 
-       integer *ku, doublecomplex *alpha, doublecomplex *a, integer *lda, 
-       doublecomplex *x, integer *incx, doublecomplex *beta, doublecomplex *
-       y, integer *incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, ix, iy, jx, jy, kx, ky, kup1, info;
-    static doublecomplex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZGBMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-
-/*     y := alpha*conjg( A' )*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n band matrix, with kl sub-diagonals and ku super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  KL     - INTEGER. */
-/*           On entry, KL specifies the number of sub-diagonals of the */
-/*           matrix A. KL must satisfy  0 .le. KL. */
-/*           Unchanged on exit. */
-
-/*  KU     - INTEGER. */
-/*           On entry, KU specifies the number of super-diagonals of the */
-/*           matrix A. KU must satisfy  0 .le. KU. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading ( kl + ku + 1 ) by n part of the */
-/*           array A must contain the matrix of coefficients, supplied */
-/*           column by column, with the leading diagonal of the matrix in */
-/*           row ( ku + 1 ) of the array, the first super-diagonal */
-/*           starting at position 2 in row ku, the first sub-diagonal */
-/*           starting at position 1 in row ( ku + 2 ), and so on. */
-/*           Elements in the array A that do not correspond to elements */
-/*           in the band matrix (such as the top left ku by ku triangle) */
-/*           are not referenced. */
-/*           The following program segment will transfer a band matrix */
-/*           from conventional full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    K = KU + 1 - J */
-/*                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL ) */
-/*                       A( K + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( kl + ku + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*kl < 0) {
-       info = 4;
-    } else if (*ku < 0) {
-       info = 5;
-    } else if (*lda < *kl + *ku + 1) {
-       info = 8;
-    } else if (*incx == 0) {
-       info = 10;
-    } else if (*incy == 0) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("ZGBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the band part of A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    kup1 = *ku + 1;
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   i__2 = jx;
-                   z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       i__2 = i__;
-                       i__3 = i__;
-                       i__5 = k + i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + 
-                               z__2.i;
-                       y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               if (x[i__4].r != 0. || x[i__4].i != 0.) {
-                   i__4 = jx;
-                   z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, 
-                           z__1.i = alpha->r * x[i__4].i + alpha->i * x[i__4]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   iy = ky;
-                   k = kup1 - j;
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = iy;
-                       i__2 = iy;
-                       i__5 = k + i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + 
-                               z__2.i;
-                       y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-               if (j > *ku) {
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       i__3 = k + i__ + j * a_dim1;
-                       i__4 = i__;
-                       z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__2 = 1, i__3 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__4 = min(i__5,i__6);
-                   for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                       d_cnjg(&z__3, &a[k + i__ + j * a_dim1]);
-                       i__2 = i__;
-                       z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                               z__2.i = z__3.r * x[i__2].i + z__3.i * x[i__2]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__4 = jy;
-               i__2 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-               y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               ix = kx;
-               k = kup1 - j;
-               if (noconj) {
-/* Computing MAX */
-                   i__4 = 1, i__2 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__3 = min(i__5,i__6);
-                   for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                       i__4 = k + i__ + j * a_dim1;
-                       i__2 = ix;
-                       z__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[i__2]
-                               .i, z__2.i = a[i__4].r * x[i__2].i + a[i__4]
-                               .i * x[i__2].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-/* Computing MAX */
-                   i__3 = 1, i__4 = j - *ku;
-/* Computing MIN */
-                   i__5 = *m, i__6 = j + *kl;
-                   i__2 = min(i__5,i__6);
-                   for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                       d_cnjg(&z__3, &a[k + i__ + j * a_dim1]);
-                       i__3 = ix;
-                       z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                               z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jy += *incy;
-               if (j > *ku) {
-                   kx += *incx;
-               }
-/* L140: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZGBMV . */
-
-} /* zgbmv_ */
-
-/* Subroutine */ int zgemm_(char *transa, char *transb, integer *m, integer *
-       n, integer *k, doublecomplex *alpha, doublecomplex *a, integer *lda, 
-       doublecomplex *b, integer *ldb, doublecomplex *beta, doublecomplex *
-       c__, integer *ldc, ftnlen transa_len, ftnlen transb_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static logical nota, notb;
-    static doublecomplex temp;
-    static logical conja, conjb;
-    static integer ncola;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa, nrowb;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZGEMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*op( A )*op( B ) + beta*C, */
-
-/*  where  op( X ) is one of */
-
-/*     op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ), */
-
-/*  alpha and beta are scalars, and A, B and C are matrices, with op( A ) */
-/*  an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n',  op( A ) = A. */
-
-/*              TRANSA = 'T' or 't',  op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c',  op( A ) = conjg( A' ). */
-
-/*           Unchanged on exit. */
-
-/*  TRANSB - CHARACTER*1. */
-/*           On entry, TRANSB specifies the form of op( B ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSB = 'N' or 'n',  op( B ) = B. */
-
-/*              TRANSB = 'T' or 't',  op( B ) = B'. */
-
-/*              TRANSB = 'C' or 'c',  op( B ) = conjg( B' ). */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies  the number  of rows  of the  matrix */
-/*           op( A )  and of the  matrix  C.  M  must  be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N  specifies the number  of columns of the matrix */
-/*           op( B ) and the number of columns of the matrix C. N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry,  K  specifies  the number of columns of the matrix */
-/*           op( A ) and the number of rows of the matrix op( B ). K must */
-/*           be at least  zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise. */
-/*           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by m  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. When  TRANSA = 'N' or 'n' then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is */
-/*           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise. */
-/*           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  n by k  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in the calling (sub) program. When  TRANSB = 'N' or 'n' then */
-/*           LDB must be at least  max( 1, k ), otherwise  LDB must be at */
-/*           least  max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n  matrix */
-/*           ( alpha*op( A )*op( B ) + beta*C ). */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not */
-/*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and */
-/*     B  respectively are to be  transposed but  not conjugated  and set */
-/*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A */
-/*     and the number of rows of  B  respectively. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    nota = lsame_(transa, "N", (ftnlen)1, (ftnlen)1);
-    notb = lsame_(transb, "N", (ftnlen)1, (ftnlen)1);
-    conja = lsame_(transa, "C", (ftnlen)1, (ftnlen)1);
-    conjb = lsame_(transb, "C", (ftnlen)1, (ftnlen)1);
-    if (nota) {
-       nrowa = *m;
-       ncola = *k;
-    } else {
-       nrowa = *k;
-       ncola = *m;
-    }
-    if (notb) {
-       nrowb = *k;
-    } else {
-       nrowb = *n;
-    }
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! nota && ! conja && ! lsame_(transa, "T", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! notb && ! conjb && ! lsame_(transb, "T", (ftnlen)1, (ftnlen)
-           1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < max(1,nrowa)) {
-       info = 8;
-    } else if (*ldb < max(1,nrowb)) {
-       info = 10;
-    } else if (*ldc < max(1,*m)) {
-       info = 13;
-    }
-    if (info != 0) {
-       xerbla_("ZGEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) &&
-            (beta->r == 1. && beta->i == 0.)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (beta->r == 0. && beta->i == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (notb) {
-       if (nota) {
-
-/*           Form  C := alpha*A*B + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L60: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = l + j * b_dim1;
-                   if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                       i__3 = l + j * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L70: */
-                       }
-                   }
-/* L80: */
-               }
-/* L90: */
-           }
-       } else if (conja) {
-
-/*           Form  C := alpha*conjg( A' )*B + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L110: */
-               }
-/* L120: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L140: */
-               }
-/* L150: */
-           }
-       }
-    } else if (nota) {
-       if (conjb) {
-
-/*           Form  C := alpha*A*conjg( B' ) + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L160: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L170: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                       d_cnjg(&z__2, &b[j + l * b_dim1]);
-                       z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
-                               z__1.i = alpha->r * z__2.i + alpha->i * 
-                               z__2.r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L180: */
-                       }
-                   }
-/* L190: */
-               }
-/* L200: */
-           }
-       } else {
-
-/*           Form  C := alpha*A*B'          + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L210: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L220: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * b_dim1;
-                   if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                       i__3 = j + l * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *m;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L230: */
-                       }
-                   }
-/* L240: */
-               }
-/* L250: */
-           }
-       }
-    } else if (conja) {
-       if (conjb) {
-
-/*           Form  C := alpha*conjg( A' )*conjg( B' ) + beta*C. */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       d_cnjg(&z__4, &b[j + l * b_dim1]);
-                       z__2.r = z__3.r * z__4.r - z__3.i * z__4.i, z__2.i = 
-                               z__3.r * z__4.i + z__3.i * z__4.r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L260: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L270: */
-               }
-/* L280: */
-           }
-       } else {
-
-/*           Form  C := alpha*conjg( A' )*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = j + l * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L290: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L300: */
-               }
-/* L310: */
-           }
-       }
-    } else {
-       if (conjb) {
-
-/*           Form  C := alpha*A'*conjg( B' ) + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       d_cnjg(&z__3, &b[j + l * b_dim1]);
-                       z__2.r = a[i__4].r * z__3.r - a[i__4].i * z__3.i, 
-                               z__2.i = a[i__4].r * z__3.i + a[i__4].i * 
-                               z__3.r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L320: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L330: */
-               }
-/* L340: */
-           }
-       } else {
-
-/*           Form  C := alpha*A'*B' + beta*C */
-
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = j + l * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L350: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L360: */
-               }
-/* L370: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZGEMM . */
-
-} /* zgemm_ */
-
-/* Subroutine */ int zgemv_(char *trans, integer *m, integer *n, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       x, integer *incx, doublecomplex *beta, doublecomplex *y, integer *
-       incy, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp;
-    static integer lenx, leny;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZGEMV  performs one of the matrix-vector operations */
-
-/*     y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   or */
-
-/*     y := alpha*conjg( A' )*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are vectors and A is an */
-/*  m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y. */
-
-/*              TRANS = 'T' or 't'   y := alpha*A'*x + beta*y. */
-
-/*              TRANS = 'C' or 'c'   y := alpha*conjg( A' )*x + beta*y. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' */
-/*           and at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. */
-/*           Before entry with BETA non-zero, the incremented array Y */
-/*           must contain the vector y. On exit, Y is overwritten by the */
-/*           updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "T", (
-           ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (ftnlen)1)
-           ) {
-       info = 1;
-    } else if (*m < 0) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*lda < max(1,*m)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZGEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-
-/*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set */
-/*     up the start points in  X  and  Y. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       lenx = *n;
-       leny = *m;
-    } else {
-       lenx = *m;
-       leny = *n;
-    }
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (lenx - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (leny - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = leny;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y := alpha*A*x + y. */
-
-       jx = kx;
-       if (*incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   i__2 = jx;
-                   z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__;
-                       i__4 = i__;
-                       i__5 = i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + 
-                               z__2.i;
-                       y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-/* L50: */
-                   }
-               }
-               jx += *incx;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   i__2 = jx;
-                   z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__1.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   iy = ky;
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = iy;
-                       i__4 = iy;
-                       i__5 = i__ + j * a_dim1;
-                       z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__2.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + 
-                               z__2.i;
-                       y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                       iy += *incy;
-/* L70: */
-                   }
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y := alpha*A'*x + y  or  y := alpha*conjg( A' )*x + y. */
-
-       jy = ky;
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__;
-                       z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                       i__3 = i__;
-                       z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                               z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jy += *incy;
-/* L110: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               temp.r = 0., temp.i = 0.;
-               ix = kx;
-               if (noconj) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = ix;
-                       z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[i__4]
-                               .i, z__2.i = a[i__3].r * x[i__4].i + a[i__3]
-                               .i * x[i__4].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L120: */
-                   }
-               } else {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                       i__3 = ix;
-                       z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                               z__2.i = z__3.r * x[i__3].i + z__3.i * x[i__3]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       ix += *incx;
-/* L130: */
-                   }
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp.r - alpha->i * temp.i, z__2.i = 
-                       alpha->r * temp.i + alpha->i * temp.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jy += *incy;
-/* L140: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZGEMV . */
-
-} /* zgemv_ */
-
-/* Subroutine */ int zgerc_(integer *m, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublecomplex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZGERC  performs the rank 1 operation */
-
-/*     A := alpha*x*conjg( y' ) + A, */
-
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZGERC ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               d_cnjg(&z__2, &y[jy]);
-               z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                       alpha->r * z__2.i + alpha->i * z__2.r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               d_cnjg(&z__2, &y[jy]);
-               z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                       alpha->r * z__2.i + alpha->i * z__2.r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-
-    return 0;
-
-/*     End of ZGERC . */
-
-} /* zgerc_ */
-
-/* Subroutine */ int zgeru_(integer *m, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *a, integer *lda)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2;
-
-    /* Local variables */
-    static integer i__, j, ix, jy, kx, info;
-    static doublecomplex temp;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZGERU  performs the rank 1 operation */
-
-/*     A := alpha*x*y' + A, */
-
-/*  where alpha is a scalar, x is an m element vector, y is an n element */
-/*  vector and A is an m by n matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of the matrix A. */
-/*           M must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( m - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the m */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry, the leading m by n part of the array A must */
-/*           contain the matrix of coefficients. On exit, A is */
-/*           overwritten by the updated matrix. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (*m < 0) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*m)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZGERU ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (*incy > 0) {
-       jy = 1;
-    } else {
-       jy = 1 - (*n - 1) * *incy;
-    }
-    if (*incx == 1) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               i__2 = jy;
-               z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = i__;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-               }
-           }
-           jy += *incy;
-/* L20: */
-       }
-    } else {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*m - 1) * *incx;
-       }
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = jy;
-           if (y[i__2].r != 0. || y[i__2].i != 0.) {
-               i__2 = jy;
-               z__1.r = alpha->r * y[i__2].r - alpha->i * y[i__2].i, z__1.i =
-                        alpha->r * y[i__2].i + alpha->i * y[i__2].r;
-               temp.r = z__1.r, temp.i = z__1.i;
-               ix = kx;
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * a_dim1;
-                   i__4 = i__ + j * a_dim1;
-                   i__5 = ix;
-                   z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, z__2.i =
-                            x[i__5].r * temp.i + x[i__5].i * temp.r;
-                   z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + z__2.i;
-                   a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                   ix += *incx;
-/* L30: */
-               }
-           }
-           jy += *incy;
-/* L40: */
-       }
-    }
-
-    return 0;
-
-/*     End of ZGERU . */
-
-} /* zgeru_ */
-
-/* Subroutine */ int zhbmv_(char *uplo, integer *n, integer *k, doublecomplex 
-       *alpha, doublecomplex *a, integer *lda, doublecomplex *x, integer *
-       incx, doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen 
-       uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, l, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHBMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian band matrix, with k super-diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the band matrix A is being supplied as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  being supplied. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  being supplied. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry, K specifies the number of super-diagonals of the */
-/*           matrix A. K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer the upper */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the hermitian matrix, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer the lower */
-/*           triangular part of a hermitian band matrix from conventional */
-/*           full matrix storage to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the */
-/*           vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of DIMENSION at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the */
-/*           vector y. On exit, Y is overwritten by the updated vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*k < 0) {
-       info = 3;
-    } else if (*lda < *k + 1) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    } else if (*incy == 0) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZHBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-           beta->i == 0.)) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array A */
-/*     are accessed sequentially with one pass through A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when upper triangle of A is stored. */
-
-       kplus1 = *k + 1;
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__2 = 1, i__3 = j - *k;
-               i__4 = j - 1;
-               for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__2 = i__;
-                   z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, z__2.i =
-                            z__3.r * x[i__2].i + z__3.i * x[i__2].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-               }
-               i__4 = j;
-               i__2 = j;
-               i__3 = kplus1 + j * a_dim1;
-               d__1 = a[i__3].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__2].r + z__3.r, z__2.i = y[i__2].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__4 = jx;
-               z__1.r = alpha->r * x[i__4].r - alpha->i * x[i__4].i, z__1.i =
-                        alpha->r * x[i__4].i + alpha->i * x[i__4].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               ix = kx;
-               iy = ky;
-               l = kplus1 - j;
-/* Computing MAX */
-               i__4 = 1, i__2 = j - *k;
-               i__3 = j - 1;
-               for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-                   y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-                            z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = kplus1 + j * a_dim1;
-               d__1 = a[i__2].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__4].r + z__3.r, z__2.i = y[i__4].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-               if (j > *k) {
-                   kx += *incx;
-                   ky += *incy;
-               }
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when lower triangle of A is stored. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = j;
-               z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__3 = j;
-               i__4 = j;
-               i__2 = j * a_dim1 + 1;
-               d__1 = a[i__2].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               l = 1 - j;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   i__4 = i__;
-                   i__2 = i__;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-                   y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = i__;
-                   z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-                            z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L90: */
-               }
-               i__3 = j;
-               i__4 = j;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__3 = jx;
-               z__1.r = alpha->r * x[i__3].r - alpha->i * x[i__3].i, z__1.i =
-                        alpha->r * x[i__3].i + alpha->i * x[i__3].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__3 = jy;
-               i__4 = jy;
-               i__2 = j * a_dim1 + 1;
-               d__1 = a[i__2].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               l = 1 - j;
-               ix = jx;
-               iy = jy;
-/* Computing MIN */
-               i__4 = *n, i__2 = j + *k;
-               i__3 = min(i__4,i__2);
-               for (i__ = j + 1; i__ <= i__3; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__4 = iy;
-                   i__2 = iy;
-                   i__5 = l + i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__2].r + z__2.r, z__1.i = y[i__2].i + z__2.i;
-                   y[i__4].r = z__1.r, y[i__4].i = z__1.i;
-                   d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                   i__4 = ix;
-                   z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, z__2.i =
-                            z__3.r * x[i__4].i + z__3.i * x[i__4].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-               }
-               i__3 = jy;
-               i__4 = jy;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-               y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHBMV . */
-
-} /* zhbmv_ */
-
-/* Subroutine */ int zhemm_(char *side, char *uplo, integer *m, integer *n, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer *
-       ldc, ftnlen side_len, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4, z__5;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHEMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*A*B + beta*C, */
-
-/*  or */
-
-/*     C := alpha*B*A + beta*C, */
-
-/*  where alpha and beta are scalars, A is an hermitian matrix and  B and */
-/*  C are m by n matrices. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  hermitian matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  hermitian  matrix   A  is  to  be */
-/*           referenced as follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  hermitian matrix is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  hermitian matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  hermitian matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  hermitian */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Note that the imaginary parts  of the diagonal elements need */
-/*           not be set, they are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set NROWA as the number of rows of A. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZHEMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (beta->r == 0. && beta->i == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               z__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                               z__2.i;
-                       c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-                       i__4 = k + j * b_dim1;
-                       d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                       z__2.r = b[i__4].r * z__3.r - b[i__4].i * z__3.i, 
-                               z__2.i = b[i__4].r * z__3.i + b[i__4].i * 
-                               z__3.r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       d__1 = a[i__4].r;
-                       z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       d__1 = a[i__5].r;
-                       z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               z__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + 
-                               z__2.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       i__3 = k + j * b_dim1;
-                       d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                       z__2.r = b[i__3].r * z__3.r - b[i__3].i * z__3.i, 
-                               z__2.i = b[i__3].r * z__3.i + b[i__3].i * 
-                               z__3.r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       d__1 = a[i__3].r;
-                       z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, z__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       d__1 = a[i__4].r;
-                       z__4.r = d__1 * temp1.r, z__4.i = d__1 * temp1.i;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*B*A + beta*C. */
-
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           d__1 = a[i__2].r;
-           z__1.r = d__1 * alpha->r, z__1.i = d__1 * alpha->i;
-           temp1.r = z__1.r, temp1.i = z__1.i;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   d_cnjg(&z__2, &a[j + k * a_dim1]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   d_cnjg(&z__2, &a[j + k * a_dim1]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-
-    return 0;
-
-/*     End of ZHEMM . */
-
-} /* zhemm_ */
-
-/* Subroutine */ int zhemv_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
-       doublecomplex *beta, doublecomplex *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHEMV  performs the matrix-vector  operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-    --y;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*lda < max(1,*n)) {
-       info = 5;
-    } else if (*incx == 0) {
-       info = 7;
-    } else if (*incy == 0) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("ZHEMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-           beta->i == 0.)) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when A is stored in upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when A is stored in lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = j;
-               i__3 = j;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = j + j * a_dim1;
-               d__1 = a[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = i__ + j * a_dim1;
-                   z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                           z__2.i = temp1.r * a[i__5].i + temp1.i * a[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHEMV . */
-
-} /* zhemv_ */
-
-/* Subroutine */ int zher_(char *uplo, integer *n, doublereal *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *a, integer *lda, 
-       ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHER   performs the hermitian rank 1 operation */
-
-/*     A := alpha*x*conjg( x' ) + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*lda < max(1,*n)) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZHER  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in upper triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   ix = kx;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in lower triangle. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   ix = jx;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = a[i__4].r + z__2.r, z__1.i = a[i__4].i + 
-                               z__2.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHER  . */
-
-} /* zher_ */
-
-/* Subroutine */ int zher2_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *a, integer *lda, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHER2  performs the hermitian rank 2 operation */
-
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an n */
-/*  by n hermitian matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the array A is to be referenced as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of A */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of A */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           lower triangular part of A is not referenced. On exit, the */
-/*           upper triangular part of the array A is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular part of the hermitian matrix and the strictly */
-/*           upper triangular part of A is not referenced. On exit, the */
-/*           lower triangular part of the array A is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --y;
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    } else if (*lda < max(1,*n)) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZHER2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through the triangular part */
-/*     of A. */
-
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when A is stored in the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L10: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when A is stored in the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L50: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = a[i__3].r + z__1.r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = i__ + j * a_dim1;
-                       i__4 = i__ + j * a_dim1;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = a[i__4].r + z__3.r, z__2.i = a[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       a[i__3].r = z__1.r, a[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = j + j * a_dim1;
-                   i__3 = j + j * a_dim1;
-                   d__1 = a[i__3].r;
-                   a[i__2].r = d__1, a[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHER2 . */
-
-} /* zher2_ */
-
-/* Subroutine */ int zher2k_(char *uplo, char *trans, integer *n, integer *k, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublereal *beta, doublecomplex *c__, integer *ldc, 
-       ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4, z__5, z__6;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHER2K  performs one of the hermitian rank 2k operations */
-
-/*     C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + beta*C, */
-
-/*  or */
-
-/*     C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + beta*C, */
-
-/*  where  alpha and beta  are scalars with  beta  real,  C is an  n by n */
-/*  hermitian matrix and  A and B  are  n by k matrices in the first case */
-/*  and  k by n  matrices in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'    C := alpha*A*conjg( B' )          + */
-/*                                         conjg( alpha )*B*conjg( A' ) + */
-/*                                         beta*C. */
-
-/*              TRANS = 'C' or 'c'    C := alpha*conjg( A' )*B          + */
-/*                                         conjg( alpha )*conjg( B' )*A + */
-/*                                         beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16         . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION            . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-/*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-
-
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZHER2K", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && *beta == 
-           1.) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*conjg( B' ) + conjg( alpha )*B*conjg( A' ) + */
-/*                   C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       d_cnjg(&z__2, &b[j + l * b_dim1]);
-                       z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
-                               z__1.i = alpha->r * z__2.i + alpha->i * 
-                               z__2.r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       d_cnjg(&z__1, &z__2);
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               z__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               z__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       d_cnjg(&z__2, &b[j + l * b_dim1]);
-                       z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, 
-                               z__1.i = alpha->r * z__2.i + alpha->i * 
-                               z__2.r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__2.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__2.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       d_cnjg(&z__1, &z__2);
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       z__2.r = a[i__5].r * temp1.r - a[i__5].i * temp1.i, 
-                               z__2.i = a[i__5].r * temp1.i + a[i__5].i * 
-                               temp1.r;
-                       i__6 = j + l * b_dim1;
-                       z__3.r = b[i__6].r * temp2.r - b[i__6].i * temp2.i, 
-                               z__3.i = b[i__6].r * temp2.i + b[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*conjg( A' )*B + conjg( alpha )*conjg( B' )*A + */
-/*                   C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       d_cnjg(&z__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L190: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.) {
-                           i__3 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = *beta * c__[i__4].r + z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       }
-                   } else {
-                       if (*beta == 0.) {
-                           i__3 = i__ + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           z__3.r = *beta * c__[i__4].r, z__3.i = *beta * 
-                                   c__[i__4].i;
-                           z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
-                                   z__4.i;
-                           d_cnjg(&z__6, alpha);
-                           z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, 
-                                   z__5.i = z__6.r * temp2.i + z__6.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
-                                   z__5.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       }
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * b_dim1;
-                       z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4].i, 
-                               z__2.i = z__3.r * b[i__4].i + z__3.i * b[i__4]
-                               .r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       d_cnjg(&z__3, &b[l + i__ * b_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L220: */
-                   }
-                   if (i__ == j) {
-                       if (*beta == 0.) {
-                           i__3 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       } else {
-                           i__3 = j + j * c_dim1;
-                           i__4 = j + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           d__1 = *beta * c__[i__4].r + z__1.r;
-                           c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       }
-                   } else {
-                       if (*beta == 0.) {
-                           i__3 = i__ + j * c_dim1;
-                           z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__2.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           d_cnjg(&z__4, alpha);
-                           z__3.r = z__4.r * temp2.r - z__4.i * temp2.i, 
-                                   z__3.i = z__4.r * temp2.i + z__4.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + 
-                                   z__3.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       } else {
-                           i__3 = i__ + j * c_dim1;
-                           i__4 = i__ + j * c_dim1;
-                           z__3.r = *beta * c__[i__4].r, z__3.i = *beta * 
-                                   c__[i__4].i;
-                           z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                                   z__4.i = alpha->r * temp1.i + alpha->i * 
-                                   temp1.r;
-                           z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + 
-                                   z__4.i;
-                           d_cnjg(&z__6, alpha);
-                           z__5.r = z__6.r * temp2.r - z__6.i * temp2.i, 
-                                   z__5.i = z__6.r * temp2.i + z__6.i * 
-                                   temp2.r;
-                           z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + 
-                                   z__5.i;
-                           c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       }
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHER2K. */
-
-} /* zher2k_ */
-
-/* Subroutine */ int zherk_(char *uplo, char *trans, integer *n, integer *k, 
-       doublereal *alpha, doublecomplex *a, integer *lda, doublereal *beta, 
-       doublecomplex *c__, integer *ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static doublereal rtemp;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHERK  performs one of the hermitian rank k operations */
-
-/*     C := alpha*A*conjg( A' ) + beta*C, */
-
-/*  or */
-
-/*     C := alpha*conjg( A' )*A + beta*C, */
-
-/*  where  alpha and beta  are  real scalars,  C is an  n by n  hermitian */
-/*  matrix and  A  is an  n by k  matrix in the  first case and a  k by n */
-/*  matrix in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*conjg( A' ) + beta*C. */
-
-/*              TRANS = 'C' or 'c'   C := alpha*conjg( A' )*A + beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'C' or 'c',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION            . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - DOUBLE PRECISION. */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16          array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  hermitian matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set,  they are assumed to be zero,  and on exit they */
-/*           are set to zero. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-/*  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1. */
-/*     Ed Anderson, Cray Research Inc. */
-
-
-/*     .. External Functions .. */
-/*     .. */
-/*     .. External Subroutines .. */
-/*     .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Local Scalars .. */
-/*     .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "C", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("ZHERK ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (*alpha == 0. || *k == 0) && *beta == 1.) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (*alpha == 0.) {
-       if (upper) {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-/* L40: */
-               }
-           }
-       } else {
-           if (*beta == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*conjg( A' ) + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       d_cnjg(&z__2, &a[j + l * a_dim1]);
-                       z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = j - 1;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = i__ + l * a_dim1;
-                       z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (*beta == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (*beta != 1.) {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = *beta * c__[i__4].r, z__1.i = *beta * c__[
-                               i__4].i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       d_cnjg(&z__2, &a[j + l * a_dim1]);
-                       z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = j + j * c_dim1;
-                       i__4 = j + j * c_dim1;
-                       i__5 = j + l * a_dim1;
-                       z__1.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                               z__1.i = temp.r * a[i__5].i + temp.i * a[i__5]
-                               .r;
-                       d__1 = c__[i__4].r + z__1.r;
-                       c__[i__3].r = d__1, c__[i__3].i = 0.;
-                       i__3 = *n;
-                       for (i__ = j + 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*conjg( A' )*A + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                   }
-                   if (*beta == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[
-                               i__4].i;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L200: */
-               }
-               rtemp = 0.;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   d_cnjg(&z__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i =
-                            z__3.r * a[i__3].i + z__3.i * a[i__3].r;
-                   z__1.r = rtemp + z__2.r, z__1.i = z__2.i;
-                   rtemp = z__1.r;
-/* L210: */
-               }
-               if (*beta == 0.) {
-                   i__2 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-/* L220: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               rtemp = 0.;
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   d_cnjg(&z__3, &a[l + j * a_dim1]);
-                   i__3 = l + j * a_dim1;
-                   z__2.r = z__3.r * a[i__3].r - z__3.i * a[i__3].i, z__2.i =
-                            z__3.r * a[i__3].i + z__3.i * a[i__3].r;
-                   z__1.r = rtemp + z__2.r, z__1.i = z__2.i;
-                   rtemp = z__1.r;
-/* L230: */
-               }
-               if (*beta == 0.) {
-                   i__2 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               } else {
-                   i__2 = j + j * c_dim1;
-                   i__3 = j + j * c_dim1;
-                   d__1 = *alpha * rtemp + *beta * c__[i__3].r;
-                   c__[i__2].r = d__1, c__[i__2].i = 0.;
-               }
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       d_cnjg(&z__3, &a[l + i__ * a_dim1]);
-                       i__4 = l + j * a_dim1;
-                       z__2.r = z__3.r * a[i__4].r - z__3.i * a[i__4].i, 
-                               z__2.i = z__3.r * a[i__4].i + z__3.i * a[i__4]
-                               .r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L240: */
-                   }
-                   if (*beta == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = *alpha * temp.r, z__1.i = *alpha * temp.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = *alpha * temp.r, z__2.i = *alpha * temp.i;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = *beta * c__[i__4].r, z__3.i = *beta * c__[
-                               i__4].i;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L250: */
-               }
-/* L260: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHERK . */
-
-} /* zherk_ */
-
-/* Subroutine */ int zhpmv_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *ap, doublecomplex *x, integer *incx, doublecomplex *
-       beta, doublecomplex *y, integer *incy, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHPMV  performs the matrix-vector operation */
-
-/*     y := alpha*A*x + beta*y, */
-
-/*  where alpha and beta are scalars, x and y are n element vectors and */
-/*  A is an n by n hermitian matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set and are assumed to be zero. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. When BETA is */
-/*           supplied as zero then Y need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. On exit, Y is overwritten by the updated */
-/*           vector y. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --y;
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 6;
-    } else if (*incy == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZHPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 1. && 
-           beta->i == 0.)) {
-       return 0;
-    }
-
-/*     Set up the start points in  X  and  Y. */
-
-    if (*incx > 0) {
-       kx = 1;
-    } else {
-       kx = 1 - (*n - 1) * *incx;
-    }
-    if (*incy > 0) {
-       ky = 1;
-    } else {
-       ky = 1 - (*n - 1) * *incy;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-/*     First form  y := beta*y. */
-
-    if (beta->r != 1. || beta->i != 0.) {
-       if (*incy == 1) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-/* L10: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = i__;
-                   i__3 = i__;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-/* L20: */
-               }
-           }
-       } else {
-           iy = ky;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   y[i__2].r = 0., y[i__2].i = 0.;
-                   iy += *incy;
-/* L30: */
-               }
-           } else {
-               i__1 = *n;
-               for (i__ = 1; i__ <= i__1; ++i__) {
-                   i__2 = iy;
-                   i__3 = iy;
-                   z__1.r = beta->r * y[i__3].r - beta->i * y[i__3].i, 
-                           z__1.i = beta->r * y[i__3].i + beta->i * y[i__3]
-                           .r;
-                   y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-                   iy += *incy;
-/* L40: */
-               }
-           }
-       }
-    }
-    if (alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  y  when AP contains the upper triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               k = kk;
-               i__2 = j - 1;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ++k;
-/* L50: */
-               }
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk + j - 1;
-               d__1 = ap[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               kk += j;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               ix = kx;
-               iy = ky;
-               i__2 = kk + j - 2;
-               for (k = kk; k <= i__2; ++k) {
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix += *incx;
-                   iy += *incy;
-/* L70: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk + j - 1;
-               d__1 = ap[i__4].r;
-               z__3.r = d__1 * temp1.r, z__3.i = d__1 * temp1.i;
-               z__2.r = y[i__3].r + z__3.r, z__2.i = y[i__3].i + z__3.i;
-               z__4.r = alpha->r * temp2.r - alpha->i * temp2.i, z__4.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L80: */
-           }
-       }
-    } else {
-
-/*        Form  y  when AP contains the lower triangle. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = j;
-               i__3 = j;
-               i__4 = kk;
-               d__1 = ap[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               k = kk + 1;
-               i__2 = *n;
-               for (i__ = j + 1; i__ <= i__2; ++i__) {
-                   i__3 = i__;
-                   i__4 = i__;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = i__;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ++k;
-/* L90: */
-               }
-               i__2 = j;
-               i__3 = j;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               kk += *n - j + 1;
-/* L100: */
-           }
-       } else {
-           jx = kx;
-           jy = ky;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               z__1.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, z__1.i =
-                        alpha->r * x[i__2].i + alpha->i * x[i__2].r;
-               temp1.r = z__1.r, temp1.i = z__1.i;
-               temp2.r = 0., temp2.i = 0.;
-               i__2 = jy;
-               i__3 = jy;
-               i__4 = kk;
-               d__1 = ap[i__4].r;
-               z__2.r = d__1 * temp1.r, z__2.i = d__1 * temp1.i;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               ix = jx;
-               iy = jy;
-               i__2 = kk + *n - j;
-               for (k = kk + 1; k <= i__2; ++k) {
-                   ix += *incx;
-                   iy += *incy;
-                   i__3 = iy;
-                   i__4 = iy;
-                   i__5 = k;
-                   z__2.r = temp1.r * ap[i__5].r - temp1.i * ap[i__5].i, 
-                           z__2.i = temp1.r * ap[i__5].i + temp1.i * ap[i__5]
-                           .r;
-                   z__1.r = y[i__4].r + z__2.r, z__1.i = y[i__4].i + z__2.i;
-                   y[i__3].r = z__1.r, y[i__3].i = z__1.i;
-                   d_cnjg(&z__3, &ap[k]);
-                   i__3 = ix;
-                   z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, z__2.i =
-                            z__3.r * x[i__3].i + z__3.i * x[i__3].r;
-                   z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-/* L110: */
-               }
-               i__2 = jy;
-               i__3 = jy;
-               z__2.r = alpha->r * temp2.r - alpha->i * temp2.i, z__2.i = 
-                       alpha->r * temp2.i + alpha->i * temp2.r;
-               z__1.r = y[i__3].r + z__2.r, z__1.i = y[i__3].i + z__2.i;
-               y[i__2].r = z__1.r, y[i__2].i = z__1.i;
-               jx += *incx;
-               jy += *incy;
-               kk += *n - j + 1;
-/* L120: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHPMV . */
-
-} /* zhpmv_ */
-
-/* Subroutine */ int zhpr_(char *uplo, integer *n, doublereal *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublereal d__1;
-    doublecomplex z__1, z__2;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHPR    performs the hermitian rank 1 operation */
-
-/*     A := alpha*x*conjg( x' ) + A, */
-
-/*  where alpha is a real scalar, x is an n element vector and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - DOUBLE PRECISION. */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    }
-    if (info != 0) {
-       xerbla_("ZHPR  ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || *alpha == 0.) {
-       return 0;
-    }
-
-/*     Set the start point in X if the increment is not unity. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   ix = kx;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ix += *incx;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   z__1.r = x[i__4].r * temp.r - x[i__4].i * temp.i, z__1.i =
-                            x[i__4].r * temp.i + x[i__4].i * temp.r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[j]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           jx = kx;
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                   d_cnjg(&z__2, &x[jx]);
-                   z__1.r = *alpha * z__2.r, z__1.i = *alpha * z__2.i;
-                   temp.r = z__1.r, temp.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   z__1.r = temp.r * x[i__4].r - temp.i * x[i__4].i, z__1.i =
-                            temp.r * x[i__4].i + temp.i * x[i__4].r;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   ix = jx;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__2.r = x[i__5].r * temp.r - x[i__5].i * temp.i, 
-                               z__2.i = x[i__5].r * temp.i + x[i__5].i * 
-                               temp.r;
-                       z__1.r = ap[i__4].r + z__2.r, z__1.i = ap[i__4].i + 
-                               z__2.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHPR  . */
-
-} /* zhpr_ */
-
-/* Subroutine */ int zhpr2_(char *uplo, integer *n, doublecomplex *alpha, 
-       doublecomplex *x, integer *incx, doublecomplex *y, integer *incy, 
-       doublecomplex *ap, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5, i__6;
-    doublereal d__1;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, iy, jx, jy, kx, ky, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZHPR2  performs the hermitian rank 2 operation */
-
-/*     A := alpha*x*conjg( y' ) + conjg( alpha )*y*conjg( x' ) + A, */
-
-/*  where alpha is a scalar, x and y are n element vectors and A is an */
-/*  n by n hermitian matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the upper or lower */
-/*           triangular part of the matrix A is supplied in the packed */
-/*           array AP as follows: */
-
-/*              UPLO = 'U' or 'u'   The upper triangular part of A is */
-/*                                  supplied in AP. */
-
-/*              UPLO = 'L' or 'l'   The lower triangular part of A is */
-/*                                  supplied in AP. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. */
-/*           Unchanged on exit. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-/*  Y      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCY ) ). */
-/*           Before entry, the incremented array Y must contain the n */
-/*           element vector y. */
-/*           Unchanged on exit. */
-
-/*  INCY   - INTEGER. */
-/*           On entry, INCY specifies the increment for the elements of */
-/*           Y. INCY must not be zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 ) */
-/*           and a( 2, 2 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the upper triangular part of the */
-/*           updated matrix. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular part of the hermitian matrix */
-/*           packed sequentially, column by column, so that AP( 1 ) */
-/*           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 ) */
-/*           and a( 3, 1 ) respectively, and so on. On exit, the array */
-/*           AP is overwritten by the lower triangular part of the */
-/*           updated matrix. */
-/*           Note that the imaginary parts of the diagonal elements need */
-/*           not be set, they are assumed to be zero, and on exit they */
-/*           are set to zero. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --ap;
-    --y;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (*n < 0) {
-       info = 2;
-    } else if (*incx == 0) {
-       info = 5;
-    } else if (*incy == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZHPR2 ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || alpha->r == 0. && alpha->i == 0.) {
-       return 0;
-    }
-
-/*     Set up the start points in X and Y if the increments are not both */
-/*     unity. */
-
-    if (*incx != 1 || *incy != 1) {
-       if (*incx > 0) {
-           kx = 1;
-       } else {
-           kx = 1 - (*n - 1) * *incx;
-       }
-       if (*incy > 0) {
-           ky = 1;
-       } else {
-           ky = 1 - (*n - 1) * *incy;
-       }
-       jx = kx;
-       jy = ky;
-    }
-
-/*     Start the operations. In this version the elements of the array AP */
-/*     are accessed sequentially with one pass through AP. */
-
-    kk = 1;
-    if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  A  when upper triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   k = kk;
-                   i__2 = j - 1;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L10: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk += j;
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   ix = kx;
-                   iy = ky;
-                   i__2 = kk + j - 2;
-                   for (k = kk; k <= i__2; ++k) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ix += *incx;
-                       iy += *incy;
-/* L30: */
-                   }
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               } else {
-                   i__2 = kk + j - 1;
-                   i__3 = kk + j - 1;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk += j;
-/* L40: */
-           }
-       }
-    } else {
-
-/*        Form  A  when lower triangle is stored in AP. */
-
-       if (*incx == 1 && *incy == 1) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               i__3 = j;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[j]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = j;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = j;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = j;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   k = kk + 1;
-                   i__2 = *n;
-                   for (i__ = j + 1; i__ <= i__2; ++i__) {
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = i__;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = i__;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-                       ++k;
-/* L50: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               kk = kk + *n - j + 1;
-/* L60: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = jx;
-               i__3 = jy;
-               if (x[i__2].r != 0. || x[i__2].i != 0. || (y[i__3].r != 0. || 
-                       y[i__3].i != 0.)) {
-                   d_cnjg(&z__2, &y[jy]);
-                   z__1.r = alpha->r * z__2.r - alpha->i * z__2.i, z__1.i = 
-                           alpha->r * z__2.i + alpha->i * z__2.r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   i__2 = jx;
-                   z__2.r = alpha->r * x[i__2].r - alpha->i * x[i__2].i, 
-                           z__2.i = alpha->r * x[i__2].i + alpha->i * x[i__2]
-                           .r;
-                   d_cnjg(&z__1, &z__2);
-                   temp2.r = z__1.r, temp2.i = z__1.i;
-                   i__2 = kk;
-                   i__3 = kk;
-                   i__4 = jx;
-                   z__2.r = x[i__4].r * temp1.r - x[i__4].i * temp1.i, 
-                           z__2.i = x[i__4].r * temp1.i + x[i__4].i * 
-                           temp1.r;
-                   i__5 = jy;
-                   z__3.r = y[i__5].r * temp2.r - y[i__5].i * temp2.i, 
-                           z__3.i = y[i__5].r * temp2.i + y[i__5].i * 
-                           temp2.r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   d__1 = ap[i__3].r + z__1.r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-                   ix = jx;
-                   iy = jy;
-                   i__2 = kk + *n - j;
-                   for (k = kk + 1; k <= i__2; ++k) {
-                       ix += *incx;
-                       iy += *incy;
-                       i__3 = k;
-                       i__4 = k;
-                       i__5 = ix;
-                       z__3.r = x[i__5].r * temp1.r - x[i__5].i * temp1.i, 
-                               z__3.i = x[i__5].r * temp1.i + x[i__5].i * 
-                               temp1.r;
-                       z__2.r = ap[i__4].r + z__3.r, z__2.i = ap[i__4].i + 
-                               z__3.i;
-                       i__6 = iy;
-                       z__4.r = y[i__6].r * temp2.r - y[i__6].i * temp2.i, 
-                               z__4.i = y[i__6].r * temp2.i + y[i__6].i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + z__4.i;
-                       ap[i__3].r = z__1.r, ap[i__3].i = z__1.i;
-/* L70: */
-                   }
-               } else {
-                   i__2 = kk;
-                   i__3 = kk;
-                   d__1 = ap[i__3].r;
-                   ap[i__2].r = d__1, ap[i__2].i = 0.;
-               }
-               jx += *incx;
-               jy += *incy;
-               kk = kk + *n - j + 1;
-/* L80: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZHPR2 . */
-
-} /* zhpr2_ */
-
-/* Subroutine */ int zrotg_(doublecomplex *ca, doublecomplex *cb, doublereal *
-       c__, doublecomplex *s)
-{
-    /* System generated locals */
-    doublereal d__1, d__2;
-    doublecomplex z__1, z__2, z__3, z__4;
-
-    /* Builtin functions */
-    double z_abs(doublecomplex *);
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
-    double sqrt(doublereal);
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static doublereal norm;
-    static doublecomplex alpha;
-    static doublereal scale;
-
-    if (z_abs(ca) != 0.) {
-       goto L10;
-    }
-    *c__ = 0.;
-    s->r = 1., s->i = 0.;
-    ca->r = cb->r, ca->i = cb->i;
-    goto L20;
-L10:
-    scale = z_abs(ca) + z_abs(cb);
-    z__2.r = scale, z__2.i = 0.;
-    z_div(&z__1, ca, &z__2);
-/* Computing 2nd power */
-    d__1 = z_abs(&z__1);
-    z__4.r = scale, z__4.i = 0.;
-    z_div(&z__3, cb, &z__4);
-/* Computing 2nd power */
-    d__2 = z_abs(&z__3);
-    norm = scale * sqrt(d__1 * d__1 + d__2 * d__2);
-    d__1 = z_abs(ca);
-    z__1.r = ca->r / d__1, z__1.i = ca->i / d__1;
-    alpha.r = z__1.r, alpha.i = z__1.i;
-    *c__ = z_abs(ca) / norm;
-    d_cnjg(&z__3, cb);
-    z__2.r = alpha.r * z__3.r - alpha.i * z__3.i, z__2.i = alpha.r * z__3.i + 
-           alpha.i * z__3.r;
-    z__1.r = z__2.r / norm, z__1.i = z__2.i / norm;
-    s->r = z__1.r, s->i = z__1.i;
-    z__1.r = norm * alpha.r, z__1.i = norm * alpha.i;
-    ca->r = z__1.r, ca->i = z__1.i;
-L20:
-    return 0;
-} /* zrotg_ */
-
-/* Subroutine */ int zscal_(integer *n, doublecomplex *za, doublecomplex *zx, 
-       integer *incx)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-    doublecomplex z__1;
-
-    /* Local variables */
-    static integer i__, ix;
-
-
-/*     scales a vector by a constant. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 3/93 to return if incx .le. 0. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --zx;
-
-    /* Function Body */
-    if (*n <= 0 || *incx <= 0) {
-       return 0;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*        code for increment not equal to 1 */
-
-    ix = 1;
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       i__3 = ix;
-       z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[
-               i__3].i + za->i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-       ix += *incx;
-/* L10: */
-    }
-    return 0;
-
-/*        code for increment equal to 1 */
-
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       i__3 = i__;
-       z__1.r = za->r * zx[i__3].r - za->i * zx[i__3].i, z__1.i = za->r * zx[
-               i__3].i + za->i * zx[i__3].r;
-       zx[i__2].r = z__1.r, zx[i__2].i = z__1.i;
-/* L30: */
-    }
-    return 0;
-} /* zscal_ */
-
-/* Subroutine */ int zswap_(integer *n, doublecomplex *zx, integer *incx, 
-       doublecomplex *zy, integer *incy)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3;
-
-    /* Local variables */
-    static integer i__, ix, iy;
-    static doublecomplex ztemp;
-
-
-/*     interchanges two vectors. */
-/*     jack dongarra, 3/11/78. */
-/*     modified 12/3/93, array(1) declarations changed to array(*) */
-
-
-    /* Parameter adjustments */
-    --zy;
-    --zx;
-
-    /* Function Body */
-    if (*n <= 0) {
-       return 0;
-    }
-    if (*incx == 1 && *incy == 1) {
-       goto L20;
-    }
-
-/*       code for unequal increments or equal increments not equal */
-/*         to 1 */
-
-    ix = 1;
-    iy = 1;
-    if (*incx < 0) {
-       ix = (-(*n) + 1) * *incx + 1;
-    }
-    if (*incy < 0) {
-       iy = (-(*n) + 1) * *incy + 1;
-    }
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = ix;
-       ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i;
-       i__2 = ix;
-       i__3 = iy;
-       zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i;
-       i__2 = iy;
-       zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i;
-       ix += *incx;
-       iy += *incy;
-/* L10: */
-    }
-    return 0;
-
-/*       code for both increments equal to 1 */
-L20:
-    i__1 = *n;
-    for (i__ = 1; i__ <= i__1; ++i__) {
-       i__2 = i__;
-       ztemp.r = zx[i__2].r, ztemp.i = zx[i__2].i;
-       i__2 = i__;
-       i__3 = i__;
-       zx[i__2].r = zy[i__3].r, zx[i__2].i = zy[i__3].i;
-       i__2 = i__;
-       zy[i__2].r = ztemp.r, zy[i__2].i = ztemp.i;
-/* L30: */
-    }
-    return 0;
-} /* zswap_ */
-
-/* Subroutine */ int zsymm_(char *side, char *uplo, integer *m, integer *n, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer *
-       ldc, ftnlen side_len, ftnlen uplo_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6;
-    doublecomplex z__1, z__2, z__3, z__4, z__5;
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZSYMM  performs one of the matrix-matrix operations */
-
-/*     C := alpha*A*B + beta*C, */
-
-/*  or */
-
-/*     C := alpha*B*A + beta*C, */
-
-/*  where  alpha and beta are scalars, A is a symmetric matrix and  B and */
-/*  C are m by n matrices. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE  specifies whether  the  symmetric matrix  A */
-/*           appears on the  left or right  in the  operation as follows: */
-
-/*              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C, */
-
-/*              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C, */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of  the  symmetric  matrix   A  is  to  be */
-/*           referenced as follows: */
-
-/*              UPLO = 'U' or 'u'   Only the upper triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the lower triangular part of the */
-/*                                  symmetric matrix is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry,  M  specifies the number of rows of the matrix  C. */
-/*           M  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of the matrix C. */
-/*           N  must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           m  when  SIDE = 'L' or 'l'  and is n  otherwise. */
-/*           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading m by m upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  m by m  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of */
-/*           the array  A  must contain the  symmetric matrix,  such that */
-/*           when  UPLO = 'U' or 'u', the leading n by n upper triangular */
-/*           part of the array  A  must contain the upper triangular part */
-/*           of the  symmetric matrix and the  strictly  lower triangular */
-/*           part of  A  is not referenced,  and when  UPLO = 'L' or 'l', */
-/*           the leading  n by n  lower triangular part  of the  array  A */
-/*           must  contain  the  lower triangular part  of the  symmetric */
-/*           matrix and the  strictly upper triangular part of  A  is not */
-/*           referenced. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then */
-/*           LDA must be at least  max( 1, m ), otherwise  LDA must be at */
-/*           least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry, the leading  m by n part of the array  B  must */
-/*           contain the matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry,  BETA  specifies the scalar  beta.  When  BETA  is */
-/*           supplied as zero then C need not be set on input. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry, the leading  m by n  part of the array  C must */
-/*           contain the matrix  C,  except when  beta  is zero, in which */
-/*           case C need not be set on entry. */
-/*           On exit, the array  C  is overwritten by the  m by n updated */
-/*           matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Set NROWA as the number of rows of A. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-/*     Test the input parameters. */
-
-    info = 0;
-    if (! lsame_(side, "L", (ftnlen)1, (ftnlen)1) && ! lsame_(side, "R", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*m < 0) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,*m)) {
-       info = 9;
-    } else if (*ldc < max(1,*m)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZSYMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*m == 0 || *n == 0 || alpha->r == 0. && alpha->i == 0. && (beta->r == 
-           1. && beta->i == 0.)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (beta->r == 0. && beta->i == 0.) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-               }
-/* L20: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__1.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-               }
-/* L40: */
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(side, "L", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                           z__1.i = alpha->r * b[i__3].i + alpha->i * b[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = i__ - 1;
-                   for (k = 1; k <= i__3; ++k) {
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + j * c_dim1;
-                       i__6 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__6].r - temp1.i * a[i__6].i, 
-                               z__2.i = temp1.r * a[i__6].i + temp1.i * a[
-                               i__6].r;
-                       z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                               z__2.i;
-                       c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-                       i__4 = k + j * b_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       z__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, z__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L50: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               z__2.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       i__5 = i__ + i__ * a_dim1;
-                       z__4.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               z__4.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L60: */
-               }
-/* L70: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               for (i__ = *m; i__ >= 1; --i__) {
-                   i__2 = i__ + j * b_dim1;
-                   z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                           z__1.i = alpha->r * b[i__2].i + alpha->i * b[i__2]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__2 = *m;
-                   for (k = i__ + 1; k <= i__2; ++k) {
-                       i__3 = k + j * c_dim1;
-                       i__4 = k + j * c_dim1;
-                       i__5 = k + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__5].r - temp1.i * a[i__5].i, 
-                               z__2.i = temp1.r * a[i__5].i + temp1.i * a[
-                               i__5].r;
-                       z__1.r = c__[i__4].r + z__2.r, z__1.i = c__[i__4].i + 
-                               z__2.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                       i__3 = k + j * b_dim1;
-                       i__4 = k + i__ * a_dim1;
-                       z__2.r = b[i__3].r * a[i__4].r - b[i__3].i * a[i__4]
-                               .i, z__2.i = b[i__3].r * a[i__4].i + b[i__3]
-                               .i * a[i__4].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L80: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + i__ * a_dim1;
-                       z__2.r = temp1.r * a[i__3].r - temp1.i * a[i__3].i, 
-                               z__2.i = temp1.r * a[i__3].i + temp1.i * a[
-                               i__3].r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   } else {
-                       i__2 = i__ + j * c_dim1;
-                       i__3 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__3].r - beta->i * c__[i__3]
-                               .i, z__3.i = beta->r * c__[i__3].i + beta->i *
-                                c__[i__3].r;
-                       i__4 = i__ + i__ * a_dim1;
-                       z__4.r = temp1.r * a[i__4].r - temp1.i * a[i__4].i, 
-                               z__4.i = temp1.r * a[i__4].i + temp1.i * a[
-                               i__4].r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__2].r = z__1.r, c__[i__2].i = z__1.i;
-                   }
-/* L90: */
-               }
-/* L100: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*B*A + beta*C. */
-
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = j + j * a_dim1;
-           z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2].i, z__1.i = 
-                   alpha->r * a[i__2].i + alpha->i * a[i__2].r;
-           temp1.r = z__1.r, temp1.i = z__1.i;
-           if (beta->r == 0. && beta->i == 0.) {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * b_dim1;
-                   z__1.r = temp1.r * b[i__4].r - temp1.i * b[i__4].i, 
-                           z__1.i = temp1.r * b[i__4].i + temp1.i * b[i__4]
-                           .r;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L110: */
-               }
-           } else {
-               i__2 = *m;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   i__3 = i__ + j * c_dim1;
-                   i__4 = i__ + j * c_dim1;
-                   z__2.r = beta->r * c__[i__4].r - beta->i * c__[i__4].i, 
-                           z__2.i = beta->r * c__[i__4].i + beta->i * c__[
-                           i__4].r;
-                   i__5 = i__ + j * b_dim1;
-                   z__3.r = temp1.r * b[i__5].r - temp1.i * b[i__5].i, 
-                           z__3.i = temp1.r * b[i__5].i + temp1.i * b[i__5]
-                           .r;
-                   z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                   c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L120: */
-               }
-           }
-           i__2 = j - 1;
-           for (k = 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   i__3 = j + k * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L130: */
-               }
-/* L140: */
-           }
-           i__2 = *n;
-           for (k = j + 1; k <= i__2; ++k) {
-               if (upper) {
-                   i__3 = j + k * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               } else {
-                   i__3 = k + j * a_dim1;
-                   z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                           z__1.i = alpha->r * a[i__3].i + alpha->i * a[i__3]
-                           .r;
-                   temp1.r = z__1.r, temp1.i = z__1.i;
-               }
-               i__3 = *m;
-               for (i__ = 1; i__ <= i__3; ++i__) {
-                   i__4 = i__ + j * c_dim1;
-                   i__5 = i__ + j * c_dim1;
-                   i__6 = i__ + k * b_dim1;
-                   z__2.r = temp1.r * b[i__6].r - temp1.i * b[i__6].i, 
-                           z__2.i = temp1.r * b[i__6].i + temp1.i * b[i__6]
-                           .r;
-                   z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5].i + 
-                           z__2.i;
-                   c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L150: */
-               }
-/* L160: */
-           }
-/* L170: */
-       }
-    }
-
-    return 0;
-
-/*     End of ZSYMM . */
-
-} /* zsymm_ */
-
-/* Subroutine */ int zsyr2k_(char *uplo, char *trans, integer *n, integer *k, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       b, integer *ldb, doublecomplex *beta, doublecomplex *c__, integer *
-       ldc, ftnlen uplo_len, ftnlen trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, 
-           i__3, i__4, i__5, i__6, i__7;
-    doublecomplex z__1, z__2, z__3, z__4, z__5;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp1, temp2;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZSYR2K  performs one of the symmetric rank 2k operations */
-
-/*     C := alpha*A*B' + alpha*B*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*B + alpha*B'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A and B  are  n by k  matrices  in the  first  case  and  k by n */
-/*  matrices in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'    C := alpha*A*B' + alpha*B*A' + */
-/*                                         beta*C. */
-
-/*              TRANS = 'T' or 't'    C := alpha*A'*B + alpha*B'*A + */
-/*                                         beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns  of the  matrices  A and B,  and on  entry  with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrices  A and B.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, kb ), where kb is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  B  must contain the matrix  B,  otherwise */
-/*           the leading  k by n  part of the array  B  must contain  the */
-/*           matrix B. */
-/*           Unchanged on exit. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDB must be at least  max( 1, n ), otherwise  LDB must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldb < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldc < max(1,*n)) {
-       info = 12;
-    }
-    if (info != 0) {
-       xerbla_("ZSYR2K", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && (beta->r 
-           == 1. && beta->i == 0.)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (upper) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*B' + alpha*B*A' + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       i__3 = j + l * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   i__4 = j + l * b_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0. || (b[i__4].r != 
-                           0. || b[i__4].i != 0.)) {
-                       i__3 = j + l * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__3.r = a[i__6].r * temp1.r - a[i__6].i * 
-                                   temp1.i, z__3.i = a[i__6].r * temp1.i + a[
-                                   i__6].i * temp1.r;
-                           z__2.r = c__[i__5].r + z__3.r, z__2.i = c__[i__5]
-                                   .i + z__3.i;
-                           i__7 = i__ + l * b_dim1;
-                           z__4.r = b[i__7].r * temp2.r - b[i__7].i * 
-                                   temp2.i, z__4.i = b[i__7].r * temp2.i + b[
-                                   i__7].i * temp2.r;
-                           z__1.r = z__2.r + z__4.r, z__1.i = z__2.i + 
-                                   z__4.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*B + alpha*B'*A + C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, z__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp1.r = 0., temp1.i = 0.;
-                   temp2.r = 0., temp2.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * b_dim1;
-                       z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * b[i__5]
-                               .i, z__2.i = a[i__4].r * b[i__5].i + a[i__4]
-                               .i * b[i__5].r;
-                       z__1.r = temp1.r + z__2.r, z__1.i = temp1.i + z__2.i;
-                       temp1.r = z__1.r, temp1.i = z__1.i;
-                       i__4 = l + i__ * b_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = b[i__4].r * a[i__5].r - b[i__4].i * a[i__5]
-                               .i, z__2.i = b[i__4].r * a[i__5].i + b[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp2.r + z__2.r, z__1.i = temp2.i + z__2.i;
-                       temp2.r = z__1.r, temp2.i = z__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__2.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__3.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__3.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__4.r = alpha->r * temp1.r - alpha->i * temp1.i, 
-                               z__4.i = alpha->r * temp1.i + alpha->i * 
-                               temp1.r;
-                       z__2.r = z__3.r + z__4.r, z__2.i = z__3.i + z__4.i;
-                       z__5.r = alpha->r * temp2.r - alpha->i * temp2.i, 
-                               z__5.i = alpha->r * temp2.i + alpha->i * 
-                               temp2.r;
-                       z__1.r = z__2.r + z__5.r, z__1.i = z__2.i + z__5.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZSYR2K. */
-
-} /* zsyr2k_ */
-
-/* Subroutine */ int zsyrk_(char *uplo, char *trans, integer *n, integer *k, 
-       doublecomplex *alpha, doublecomplex *a, integer *lda, doublecomplex *
-       beta, doublecomplex *c__, integer *ldc, ftnlen uplo_len, ftnlen 
-       trans_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Local variables */
-    static integer i__, j, l, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZSYRK  performs one of the symmetric rank k operations */
-
-/*     C := alpha*A*A' + beta*C, */
-
-/*  or */
-
-/*     C := alpha*A'*A + beta*C, */
-
-/*  where  alpha and beta  are scalars,  C is an  n by n symmetric matrix */
-/*  and  A  is an  n by k  matrix in the first case and a  k by n  matrix */
-/*  in the second case. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On  entry,   UPLO  specifies  whether  the  upper  or  lower */
-/*           triangular  part  of the  array  C  is to be  referenced  as */
-/*           follows: */
-
-/*              UPLO = 'U' or 'u'   Only the  upper triangular part of  C */
-/*                                  is to be referenced. */
-
-/*              UPLO = 'L' or 'l'   Only the  lower triangular part of  C */
-/*                                  is to be referenced. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry,  TRANS  specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   C := alpha*A*A' + beta*C. */
-
-/*              TRANS = 'T' or 't'   C := alpha*A'*A + beta*C. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry,  N specifies the order of the matrix C.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with  TRANS = 'N' or 'n',  K  specifies  the number */
-/*           of  columns   of  the   matrix   A,   and  on   entry   with */
-/*           TRANS = 'T' or 't',  K  specifies  the number of rows of the */
-/*           matrix A.  K must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry, ALPHA specifies the scalar alpha. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, ka ), where ka is */
-/*           k  when  TRANS = 'N' or 'n',  and is  n  otherwise. */
-/*           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k */
-/*           part of the array  A  must contain the matrix  A,  otherwise */
-/*           the leading  k by n  part of the array  A  must contain  the */
-/*           matrix A. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n' */
-/*           then  LDA must be at least  max( 1, n ), otherwise  LDA must */
-/*           be at least  max( 1, k ). */
-/*           Unchanged on exit. */
-
-/*  BETA   - COMPLEX*16      . */
-/*           On entry, BETA specifies the scalar beta. */
-/*           Unchanged on exit. */
-
-/*  C      - COMPLEX*16       array of DIMENSION ( LDC, n ). */
-/*           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n */
-/*           upper triangular part of the array C must contain the upper */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           lower triangular part of C is not referenced.  On exit, the */
-/*           upper triangular part of the array  C is overwritten by the */
-/*           upper triangular part of the updated matrix. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n */
-/*           lower triangular part of the array C must contain the lower */
-/*           triangular part  of the  symmetric matrix  and the strictly */
-/*           upper triangular part of C is not referenced.  On exit, the */
-/*           lower triangular part of the array  C is overwritten by the */
-/*           lower triangular part of the updated matrix. */
-
-/*  LDC    - INTEGER. */
-/*           On entry, LDC specifies the first dimension of C as declared */
-/*           in  the  calling  (sub)  program.   LDC  must  be  at  least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    c_dim1 = *ldc;
-    c_offset = 1 + c_dim1 * 1;
-    c__ -= c_offset;
-
-    /* Function Body */
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-       nrowa = *n;
-    } else {
-       nrowa = *k;
-    }
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (*n < 0) {
-       info = 3;
-    } else if (*k < 0) {
-       info = 4;
-    } else if (*lda < max(1,nrowa)) {
-       info = 7;
-    } else if (*ldc < max(1,*n)) {
-       info = 10;
-    }
-    if (info != 0) {
-       xerbla_("ZSYRK ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0 || (alpha->r == 0. && alpha->i == 0. || *k == 0) && (beta->r 
-           == 1. && beta->i == 0.)) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       if (upper) {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L10: */
-                   }
-/* L20: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L30: */
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (beta->r == 0. && beta->i == 0.) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  C := alpha*A*A' + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L90: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = j;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L100: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = j;
-                       for (i__ = 1; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L110: */
-                       }
-                   }
-/* L120: */
-               }
-/* L130: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               if (beta->r == 0. && beta->i == 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       c__[i__3].r = 0., c__[i__3].i = 0.;
-/* L140: */
-                   }
-               } else if (beta->r != 1. || beta->i != 0.) {
-                   i__2 = *n;
-                   for (i__ = j; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * c_dim1;
-                       i__4 = i__ + j * c_dim1;
-                       z__1.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__1.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-/* L150: */
-                   }
-               }
-               i__2 = *k;
-               for (l = 1; l <= i__2; ++l) {
-                   i__3 = j + l * a_dim1;
-                   if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                       i__3 = j + l * a_dim1;
-                       z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3].i, 
-                               z__1.i = alpha->r * a[i__3].i + alpha->i * a[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__3 = *n;
-                       for (i__ = j; i__ <= i__3; ++i__) {
-                           i__4 = i__ + j * c_dim1;
-                           i__5 = i__ + j * c_dim1;
-                           i__6 = i__ + l * a_dim1;
-                           z__2.r = temp.r * a[i__6].r - temp.i * a[i__6].i, 
-                                   z__2.i = temp.r * a[i__6].i + temp.i * a[
-                                   i__6].r;
-                           z__1.r = c__[i__5].r + z__2.r, z__1.i = c__[i__5]
-                                   .i + z__2.i;
-                           c__[i__4].r = z__1.r, c__[i__4].i = z__1.i;
-/* L160: */
-                       }
-                   }
-/* L170: */
-               }
-/* L180: */
-           }
-       }
-    } else {
-
-/*        Form  C := alpha*A'*A + beta*C. */
-
-       if (upper) {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = j;
-               for (i__ = 1; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, z__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L200: */
-               }
-/* L210: */
-           }
-       } else {
-           i__1 = *n;
-           for (j = 1; j <= i__1; ++j) {
-               i__2 = *n;
-               for (i__ = j; i__ <= i__2; ++i__) {
-                   temp.r = 0., temp.i = 0.;
-                   i__3 = *k;
-                   for (l = 1; l <= i__3; ++l) {
-                       i__4 = l + i__ * a_dim1;
-                       i__5 = l + j * a_dim1;
-                       z__2.r = a[i__4].r * a[i__5].r - a[i__4].i * a[i__5]
-                               .i, z__2.i = a[i__4].r * a[i__5].i + a[i__4]
-                               .i * a[i__5].r;
-                       z__1.r = temp.r + z__2.r, z__1.i = temp.i + z__2.i;
-                       temp.r = z__1.r, temp.i = z__1.i;
-/* L220: */
-                   }
-                   if (beta->r == 0. && beta->i == 0.) {
-                       i__3 = i__ + j * c_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   } else {
-                       i__3 = i__ + j * c_dim1;
-                       z__2.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__2.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       i__4 = i__ + j * c_dim1;
-                       z__3.r = beta->r * c__[i__4].r - beta->i * c__[i__4]
-                               .i, z__3.i = beta->r * c__[i__4].i + beta->i *
-                                c__[i__4].r;
-                       z__1.r = z__2.r + z__3.r, z__1.i = z__2.i + z__3.i;
-                       c__[i__3].r = z__1.r, c__[i__3].i = z__1.i;
-                   }
-/* L230: */
-               }
-/* L240: */
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZSYRK . */
-
-} /* zsyrk_ */
-
-/* Subroutine */ int ztbmv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer 
-       *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTBMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular band matrix, with ( k + 1 ) diagonals. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZTBMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX   too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*         Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__2 = j;
-                           i__3 = kplus1 + j * a_dim1;
-                           z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0. || x[i__4].i != 0.) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = kplus1 - j;
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           i__4 = ix;
-                           i__2 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__2].r + z__2.r, z__1.i = x[i__2].i + 
-                                   z__2.i;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__2 = kplus1 + j * a_dim1;
-                           z__1.r = x[i__4].r * a[i__2].r - x[i__4].i * a[
-                                   i__2].i, z__1.i = x[i__4].r * a[i__2].i + 
-                                   x[i__4].i * a[i__2].r;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__1 = *n, i__3 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__1,i__3); i__ >= i__4; --i__) {
-                           i__1 = i__;
-                           i__3 = i__;
-                           i__2 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__4 = j;
-                           i__1 = j;
-                           i__3 = j * a_dim1 + 1;
-                           z__1.r = x[i__1].r * a[i__3].r - x[i__1].i * a[
-                                   i__3].i, z__1.i = x[i__1].r * a[i__3].i + 
-                                   x[i__1].i * a[i__3].r;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   if (x[i__4].r != 0. || x[i__4].i != 0.) {
-                       i__4 = jx;
-                       temp.r = x[i__4].r, temp.i = x[i__4].i;
-                       ix = kx;
-                       l = 1 - j;
-/* Computing MIN */
-                       i__4 = *n, i__1 = j + *k;
-                       i__3 = j + 1;
-                       for (i__ = min(i__4,i__1); i__ >= i__3; --i__) {
-                           i__4 = ix;
-                           i__1 = ix;
-                           i__2 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__2.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           z__1.r = x[i__1].r + z__2.r, z__1.i = x[i__1].i + 
-                                   z__2.i;
-                           x[i__4].r = z__1.r, x[i__4].i = z__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__3 = jx;
-                           i__4 = jx;
-                           i__1 = j * a_dim1 + 1;
-                           z__1.r = x[i__4].r * a[i__1].r - x[i__4].i * a[
-                                   i__1].i, z__1.i = x[i__4].r * a[i__1].i + 
-                                   x[i__4].i * a[i__1].r;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__3 = j;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = i__;
-                           z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, z__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-                                   z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-                                   i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   kx -= *incx;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__3 = kplus1 + j * a_dim1;
-                           z__1.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__1.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__1 = ix;
-                           z__2.r = a[i__4].r * x[i__1].r - a[i__4].i * x[
-                                   i__1].i, z__2.i = a[i__4].r * x[i__1].i + 
-                                   a[i__4].i * x[i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MAX */
-                       i__4 = 1, i__1 = j - *k;
-                       i__3 = max(i__4,i__1);
-                       for (i__ = j - 1; i__ >= i__3; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = ix;
-                           z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-                                   z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-                                   i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L130: */
-                       }
-                   }
-                   i__3 = jx;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = j;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = i__;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = i__;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__3 = *n;
-               for (j = 1; j <= i__3; ++j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   kx += *incx;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-                       if (nounit) {
-                           i__4 = j * a_dim1 + 1;
-                           z__1.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__1.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-/* Computing MIN */
-                       i__1 = *n, i__2 = j + *k;
-                       i__4 = min(i__1,i__2);
-                       for (i__ = j + 1; i__ <= i__4; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L190: */
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTBMV . */
-
-} /* ztbmv_ */
-
-/* Subroutine */ int ztbsv_(char *uplo, char *trans, char *diag, integer *n, 
-       integer *k, doublecomplex *a, integer *lda, doublecomplex *x, integer 
-       *incx, ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, l, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer kplus1;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTBSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular band matrix, with ( k + 1 ) */
-/*  diagonals. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  K      - INTEGER. */
-/*           On entry with UPLO = 'U' or 'u', K specifies the number of */
-/*           super-diagonals of the matrix A. */
-/*           On entry with UPLO = 'L' or 'l', K specifies the number of */
-/*           sub-diagonals of the matrix A. */
-/*           K must satisfy  0 .le. K. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the upper triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row */
-/*           ( k + 1 ) of the array, the first super-diagonal starting at */
-/*           position 2 in row k, and so on. The top left k by k triangle */
-/*           of the array A is not referenced. */
-/*           The following program segment will transfer an upper */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = K + 1 - J */
-/*                    DO 10, I = MAX( 1, J - K ), J */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 ) */
-/*           by n part of the array A must contain the lower triangular */
-/*           band part of the matrix of coefficients, supplied column by */
-/*           column, with the leading diagonal of the matrix in row 1 of */
-/*           the array, the first sub-diagonal starting at position 1 in */
-/*           row 2, and so on. The bottom right k by k triangle of the */
-/*           array A is not referenced. */
-/*           The following program segment will transfer a lower */
-/*           triangular band matrix from conventional full matrix storage */
-/*           to band storage: */
-
-/*                 DO 20, J = 1, N */
-/*                    M = 1 - J */
-/*                    DO 10, I = J, MIN( N, J + K ) */
-/*                       A( M + I, J ) = matrix( I, J ) */
-/*              10    CONTINUE */
-/*              20 CONTINUE */
-
-/*           Note that when DIAG = 'U' or 'u' the elements of the array A */
-/*           corresponding to the diagonal elements of the matrix are not */
-/*           referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           ( k + 1 ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*k < 0) {
-       info = 5;
-    } else if (*lda < *k + 1) {
-       info = 7;
-    } else if (*incx == 0) {
-       info = 9;
-    }
-    if (info != 0) {
-       xerbla_("ZTBSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed by sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = j;
-                           z_div(&z__1, &x[j], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   kx -= *incx;
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       ix = kx;
-                       l = kplus1 - j;
-                       if (nounit) {
-                           i__1 = jx;
-                           z_div(&z__1, &x[jx], &a[kplus1 + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__1 = max(i__2,i__3);
-                       for (i__ = j - 1; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           ix -= *incx;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = j;
-                           z_div(&z__1, &x[j], &a[j * a_dim1 + 1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   kx += *incx;
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       ix = kx;
-                       l = 1 - j;
-                       if (nounit) {
-                           i__2 = jx;
-                           z_div(&z__1, &x[jx], &a[j * a_dim1 + 1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-/* Computing MIN */
-                       i__3 = *n, i__4 = j + *k;
-                       i__2 = min(i__3,i__4);
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = l + i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ix += *incx;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A') )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kplus1 = *k + 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           i__2 = l + i__ + j * a_dim1;
-                           i__3 = i__;
-                           z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, z__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__4 = 1, i__2 = j - *k;
-                       i__3 = j - 1;
-                       for (i__ = max(i__4,i__2); i__ <= i__3; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__4 = i__;
-                           z__2.r = z__3.r * x[i__4].r - z__3.i * x[i__4].i, 
-                                   z__2.i = z__3.r * x[i__4].i + z__3.i * x[
-                                   i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__3 = j;
-                   x[i__3].r = temp.r, x[i__3].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__3 = jx;
-                   temp.r = x[i__3].r, temp.i = x[i__3].i;
-                   ix = kx;
-                   l = kplus1 - j;
-                   if (noconj) {
-/* Computing MAX */
-                       i__3 = 1, i__4 = j - *k;
-                       i__2 = j - 1;
-                       for (i__ = max(i__3,i__4); i__ <= i__2; ++i__) {
-                           i__3 = l + i__ + j * a_dim1;
-                           i__4 = ix;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[kplus1 + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MAX */
-                       i__2 = 1, i__3 = j - *k;
-                       i__4 = j - 1;
-                       for (i__ = max(i__2,i__3); i__ <= i__4; ++i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[kplus1 + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__4 = jx;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-                   jx += *incx;
-                   if (j > *k) {
-                       kx += *incx;
-                   }
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           i__1 = l + i__ + j * a_dim1;
-                           i__4 = i__;
-                           z__2.r = a[i__1].r * x[i__4].r - a[i__1].i * x[
-                                   i__4].i, z__2.i = a[i__1].r * x[i__4].i + 
-                                   a[i__1].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__2 = *n, i__1 = j + *k;
-                       i__4 = j + 1;
-                       for (i__ = min(i__2,i__1); i__ >= i__4; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__2 = i__;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__4 = j;
-                   x[i__4].r = temp.r, x[i__4].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__4 = jx;
-                   temp.r = x[i__4].r, temp.i = x[i__4].i;
-                   ix = kx;
-                   l = 1 - j;
-                   if (noconj) {
-/* Computing MIN */
-                       i__4 = *n, i__2 = j + *k;
-                       i__1 = j + 1;
-                       for (i__ = min(i__4,i__2); i__ >= i__1; --i__) {
-                           i__4 = l + i__ + j * a_dim1;
-                           i__2 = ix;
-                           z__2.r = a[i__4].r * x[i__2].r - a[i__4].i * x[
-                                   i__2].i, z__2.i = a[i__4].r * x[i__2].i + 
-                                   a[i__4].i * x[i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j * a_dim1 + 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-/* Computing MIN */
-                       i__1 = *n, i__4 = j + *k;
-                       i__2 = j + 1;
-                       for (i__ = min(i__1,i__4); i__ >= i__2; --i__) {
-                           d_cnjg(&z__3, &a[l + i__ + j * a_dim1]);
-                           i__1 = ix;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j * a_dim1 + 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx -= *incx;
-                   if (*n - j >= *k) {
-                       kx -= *incx;
-                   }
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTBSV . */
-
-} /* ztbsv_ */
-
-/* Subroutine */ int ztpmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *ap, doublecomplex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTPMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix, supplied in packed form. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZTPMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x:= A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ++k;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = kk + j - 1;
-                           z__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, z__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-                   kk += j;
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = kk + j - 1;
-                           z__1.r = x[i__3].r * ap[i__4].r - x[i__3].i * ap[
-                                   i__4].i, z__1.i = x[i__3].r * ap[i__4].i 
-                                   + x[i__3].i * ap[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-                   jx += *incx;
-                   kk += j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = k;
-                           z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, z__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           --k;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = kk - *n + j;
-                           z__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, z__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-                   kk -= *n - j + 1;
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, z__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = kk - *n + j;
-                           z__1.r = x[i__2].r * ap[i__3].r - x[i__2].i * ap[
-                                   i__3].i, z__1.i = x[i__2].r * ap[i__3].i 
-                                   + x[i__2].i * ap[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk - 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           z__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, z__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = k;
-                           i__2 = i__;
-                           z__2.r = ap[i__1].r * x[i__2].r - ap[i__1].i * x[
-                                   i__2].i, z__2.i = ap[i__1].r * x[i__2].i 
-                                   + ap[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__1 = i__;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= j;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = kk;
-                           z__1.r = temp.r * ap[i__1].r - temp.i * ap[i__1]
-                                   .i, z__1.i = temp.r * ap[i__1].i + temp.i 
-                                   * ap[i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = k;
-                           i__3 = ix;
-                           z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, z__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           d_cnjg(&z__3, &ap[k]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk + 1;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           z__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, z__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += *n - j + 1;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = kk;
-                           z__1.r = temp.r * ap[i__2].r - temp.i * ap[i__2]
-                                   .i, z__1.i = temp.r * ap[i__2].i + temp.i 
-                                   * ap[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = k;
-                           i__4 = ix;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTPMV . */
-
-} /* ztpmv_ */
-
-/* Subroutine */ int ztpsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *ap, doublecomplex *x, integer *incx, ftnlen uplo_len, 
-       ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, kk, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTPSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix, supplied in packed form. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  AP     - COMPLEX*16       array of DIMENSION at least */
-/*           ( ( n*( n + 1 ) )/2 ). */
-/*           Before entry with  UPLO = 'U' or 'u', the array AP must */
-/*           contain the upper triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 ) */
-/*           respectively, and so on. */
-/*           Before entry with UPLO = 'L' or 'l', the array AP must */
-/*           contain the lower triangular matrix packed sequentially, */
-/*           column by column, so that AP( 1 ) contains a( 1, 1 ), */
-/*           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 ) */
-/*           respectively, and so on. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    --x;
-    --ap;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*incx == 0) {
-       info = 7;
-    }
-    if (info != 0) {
-       xerbla_("ZTPSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of AP are */
-/*     accessed sequentially with one pass through AP. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = j;
-                           z_div(&z__1, &x[j], &ap[kk]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       k = kk - 1;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = k;
-                           z__2.r = temp.r * ap[i__3].r - temp.i * ap[i__3]
-                                   .i, z__2.i = temp.r * ap[i__3].i + temp.i 
-                                   * ap[i__3].r;
-                           z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                           --k;
-/* L10: */
-                       }
-                   }
-                   kk -= j;
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = jx;
-                           z_div(&z__1, &x[jx], &ap[kk]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       i__1 = kk - j + 1;
-                       for (k = kk - 1; k >= i__1; --k) {
-                           ix -= *incx;
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = k;
-                           z__2.r = temp.r * ap[i__4].r - temp.i * ap[i__4]
-                                   .i, z__2.i = temp.r * ap[i__4].i + temp.i 
-                                   * ap[i__4].r;
-                           z__1.r = x[i__3].r - z__2.r, z__1.i = x[i__3].i - 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-                   kk -= j;
-/* L40: */
-               }
-           }
-       } else {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = j;
-                           z_div(&z__1, &x[j], &ap[kk]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       k = kk + 1;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ++k;
-/* L50: */
-                       }
-                   }
-                   kk += *n - j + 1;
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = jx;
-                           z_div(&z__1, &x[jx], &ap[kk]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = kk + *n - j;
-                       for (k = kk + 1; k <= i__2; ++k) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = k;
-                           z__2.r = temp.r * ap[i__5].r - temp.i * ap[i__5]
-                                   .i, z__2.i = temp.r * ap[i__5].i + temp.i 
-                                   * ap[i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-                   kk += *n - j + 1;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           kk = 1;
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   k = kk;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = k;
-                           i__4 = i__;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L90: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk + j - 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ++k;
-/* L100: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk + j - 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   kk += j;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           i__3 = k;
-                           i__4 = ix;
-                           z__2.r = ap[i__3].r * x[i__4].r - ap[i__3].i * x[
-                                   i__4].i, z__2.i = ap[i__3].r * x[i__4].i 
-                                   + ap[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk + j - 1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = kk + j - 2;
-                       for (k = kk; k <= i__2; ++k) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk + j - 1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-                   kk += j;
-/* L140: */
-               }
-           }
-       } else {
-           kk = *n * (*n + 1) / 2;
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   k = kk;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = k;
-                           i__3 = i__;
-                           z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, z__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L150: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk - *n + j]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__2 = i__;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           --k;
-/* L160: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk - *n + j]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   kk -= *n - j + 1;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = kx;
-                   if (noconj) {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           i__2 = k;
-                           i__3 = ix;
-                           z__2.r = ap[i__2].r * x[i__3].r - ap[i__2].i * x[
-                                   i__3].i, z__2.i = ap[i__2].r * x[i__3].i 
-                                   + ap[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &ap[kk - *n + j]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = kk - (*n - (j + 1));
-                       for (k = kk; k >= i__1; --k) {
-                           d_cnjg(&z__3, &ap[k]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &ap[kk - *n + j]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-                   kk -= *n - j + 1;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTPSV . */
-
-} /* ztpsv_ */
-
-/* Subroutine */ int ztrmm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, 
-       integer *lda, doublecomplex *b, integer *ldb, ftnlen side_len, ftnlen 
-       uplo_len, ftnlen transa_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTRMM  performs one of the matrix-matrix operations */
-
-/*     B := alpha*op( A )*B,   or   B := alpha*B*op( A ) */
-
-/*  where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry,  SIDE specifies whether  op( A ) multiplies B from */
-/*           the left or right as follows: */
-
-/*              SIDE = 'L' or 'l'   B := alpha*op( A )*B. */
-
-/*              SIDE = 'R' or 'r'   B := alpha*B*op( A ). */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain the matrix  B,  and  on exit  is overwritten  by the */
-/*           transformed matrix. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZTRMM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0., b[i__3].i = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*A*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                           i__3 = k + j * b_dim1;
-                           z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, z__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__3 = k - 1;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               z__2.r = temp.r * a[i__6].r - temp.i * a[i__6]
-                                       .i, z__2.i = temp.r * a[i__6].i + 
-                                       temp.i * a[i__6].r;
-                               z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
-                                       .i + z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L30: */
-                           }
-                           if (nounit) {
-                               i__3 = k + k * a_dim1;
-                               z__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, z__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = k + j * b_dim1;
-                           b[i__3].r = temp.r, b[i__3].i = temp.i;
-                       }
-/* L40: */
-                   }
-/* L50: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0. || b[i__2].i != 0.) {
-                           i__2 = k + j * b_dim1;
-                           z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2]
-                                   .i, z__1.i = alpha->r * b[i__2].i + 
-                                   alpha->i * b[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__2 = k + j * b_dim1;
-                           b[i__2].r = temp.r, b[i__2].i = temp.i;
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               i__3 = k + j * b_dim1;
-                               i__4 = k + k * a_dim1;
-                               z__1.r = b[i__3].r * a[i__4].r - b[i__3].i * 
-                                       a[i__4].i, z__1.i = b[i__3].r * a[
-                                       i__4].i + b[i__3].i * a[i__4].r;
-                               b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = k + 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * a_dim1;
-                               z__2.r = temp.r * a[i__5].r - temp.i * a[i__5]
-                                       .i, z__2.i = temp.r * a[i__5].i + 
-                                       temp.i * a[i__5].r;
-                               z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
-                                       .i + z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L60: */
-                           }
-                       }
-/* L70: */
-                   }
-/* L80: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*A'*B   or   B := alpha*conjg( A' )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       temp.r = b[i__2].r, temp.i = b[i__2].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__2 = i__ + i__ * a_dim1;
-                               z__1.r = temp.r * a[i__2].r - temp.i * a[i__2]
-                                       .i, z__1.i = temp.r * a[i__2].i + 
-                                       temp.i * a[i__2].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, z__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                           }
-                       } else {
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                       z__1.i = temp.r * z__2.i + temp.i * 
-                                       z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = i__ - 1;
-                           for (k = 1; k <= i__2; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3]
-                                       .i, z__2.i = z__3.r * b[i__3].i + 
-                                       z__3.i * b[i__3].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L110: */
-                   }
-/* L120: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       temp.r = b[i__3].r, temp.i = b[i__3].i;
-                       if (noconj) {
-                           if (nounit) {
-                               i__3 = i__ + i__ * a_dim1;
-                               z__1.r = temp.r * a[i__3].r - temp.i * a[i__3]
-                                       .i, z__1.i = temp.r * a[i__3].i + 
-                                       temp.i * a[i__3].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, z__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                           }
-                       } else {
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                       z__1.i = temp.r * z__2.i + temp.i * 
-                                       z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (k = i__ + 1; k <= i__3; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4]
-                                       .i, z__2.i = z__3.r * b[i__4].i + 
-                                       z__3.i * b[i__4].r;
-                               z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L140: */
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * temp.r - alpha->i * temp.i, 
-                               z__1.i = alpha->r * temp.i + alpha->i * 
-                               temp.r;
-                       b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L150: */
-                   }
-/* L160: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*A. */
-
-           if (upper) {
-               for (j = *n; j >= 1; --j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__1 = j + j * a_dim1;
-                       z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                               z__1.i = temp.r * a[i__1].i + temp.i * a[i__1]
-                               .r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                   }
-                   i__1 = *m;
-                   for (i__ = 1; i__ <= i__1; ++i__) {
-                       i__2 = i__ + j * b_dim1;
-                       i__3 = i__ + j * b_dim1;
-                       z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                               z__1.i = temp.r * b[i__3].i + temp.i * b[i__3]
-                               .r;
-                       b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L170: */
-                   }
-                   i__1 = j - 1;
-                   for (k = 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           i__2 = k + j * a_dim1;
-                           z__1.r = alpha->r * a[i__2].r - alpha->i * a[i__2]
-                                   .i, z__1.i = alpha->r * a[i__2].i + 
-                                   alpha->i * a[i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, z__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
-                                       .i + z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L180: */
-                           }
-                       }
-/* L190: */
-                   }
-/* L200: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       i__2 = j + j * a_dim1;
-                       z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                               z__1.i = temp.r * a[i__2].i + temp.i * a[i__2]
-                               .r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                   }
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       i__4 = i__ + j * b_dim1;
-                       z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                               z__1.i = temp.r * b[i__4].i + temp.i * b[i__4]
-                               .r;
-                       b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L210: */
-                   }
-                   i__2 = *n;
-                   for (k = j + 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           i__3 = k + j * a_dim1;
-                           z__1.r = alpha->r * a[i__3].r - alpha->i * a[i__3]
-                                   .i, z__1.i = alpha->r * a[i__3].i + 
-                                   alpha->i * a[i__3].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, z__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
-                                       .i + z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L220: */
-                           }
-                       }
-/* L230: */
-                   }
-/* L240: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*A'   or   B := alpha*B*conjg( A' ). */
-
-           if (upper) {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   i__2 = k - 1;
-                   for (j = 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               z__1.r = alpha->r * a[i__3].r - alpha->i * a[
-                                       i__3].i, z__1.i = alpha->r * a[i__3]
-                                       .i + alpha->i * a[i__3].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           } else {
-                               d_cnjg(&z__2, &a[j + k * a_dim1]);
-                               z__1.r = alpha->r * z__2.r - alpha->i * 
-                                       z__2.i, z__1.i = alpha->r * z__2.i + 
-                                       alpha->i * z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, z__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               z__1.r = b[i__5].r + z__2.r, z__1.i = b[i__5]
-                                       .i + z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__2 = k + k * a_dim1;
-                           z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   if (temp.r != 1. || temp.i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   z__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           } else {
-               for (k = *n; k >= 1; --k) {
-                   i__1 = *n;
-                   for (j = k + 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               z__1.r = alpha->r * a[i__2].r - alpha->i * a[
-                                       i__2].i, z__1.i = alpha->r * a[i__2]
-                                       .i + alpha->i * a[i__2].r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           } else {
-                               d_cnjg(&z__2, &a[j + k * a_dim1]);
-                               z__1.r = alpha->r * z__2.r - alpha->i * 
-                                       z__2.i, z__1.i = alpha->r * z__2.i + 
-                                       alpha->i * z__2.r;
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, z__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               z__1.r = b[i__4].r + z__2.r, z__1.i = b[i__4]
-                                       .i + z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L290: */
-                           }
-                       }
-/* L300: */
-                   }
-                   temp.r = alpha->r, temp.i = alpha->i;
-                   if (nounit) {
-                       if (noconj) {
-                           i__1 = k + k * a_dim1;
-                           z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   z__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   if (temp.r != 1. || temp.i != 0.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   z__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L310: */
-                       }
-                   }
-/* L320: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTRMM . */
-
-} /* ztrmm_ */
-
-/* Subroutine */ int ztrmv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void d_cnjg(doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTRMV  performs one of the matrix-vector operations */
-
-/*     x := A*x,   or   x := A'*x,   or   x := conjg( A' )*x, */
-
-/*  where x is an n element vector and  A is an n by n unit, or non-unit, */
-/*  upper or lower triangular matrix. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the operation to be performed as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   x := A*x. */
-
-/*              TRANS = 'T' or 't'   x := A'*x. */
-
-/*              TRANS = 'C' or 'c'   x := conjg( A' )*x. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element vector x. On exit, X is overwritten with the */
-/*           tranformed vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("ZTRMV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := A*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L10: */
-                       }
-                       if (nounit) {
-                           i__2 = j;
-                           i__3 = j;
-                           i__4 = j + j * a_dim1;
-                           z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, z__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = kx;
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r + z__2.r, z__1.i = x[i__4].i + 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-                           ix += *incx;
-/* L30: */
-                       }
-                       if (nounit) {
-                           i__2 = jx;
-                           i__3 = jx;
-                           i__4 = j + j * a_dim1;
-                           z__1.r = x[i__3].r * a[i__4].r - x[i__3].i * a[
-                                   i__4].i, z__1.i = x[i__3].r * a[i__4].i + 
-                                   x[i__3].i * a[i__4].r;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                   }
-                   jx += *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__;
-                           i__3 = i__;
-                           i__4 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-/* L50: */
-                       }
-                       if (nounit) {
-                           i__1 = j;
-                           i__2 = j;
-                           i__3 = j + j * a_dim1;
-                           z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = kx;
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = ix;
-                           i__3 = ix;
-                           i__4 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__4].r - temp.i * a[i__4].i, 
-                                   z__2.i = temp.r * a[i__4].i + temp.i * a[
-                                   i__4].r;
-                           z__1.r = x[i__3].r + z__2.r, z__1.i = x[i__3].i + 
-                                   z__2.i;
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                           ix -= *incx;
-/* L70: */
-                       }
-                       if (nounit) {
-                           i__1 = jx;
-                           i__2 = jx;
-                           i__3 = j + j * a_dim1;
-                           z__1.r = x[i__2].r * a[i__3].r - x[i__2].i * a[
-                                   i__3].i, z__1.i = x[i__2].r * a[i__3].i + 
-                                   x[i__2].i * a[i__3].r;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                   }
-                   jx -= *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := A'*x  or  x := conjg( A' )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   z__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = i__;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__1 = i__;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__1 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__1].r - temp.i * a[i__1].i, 
-                                   z__1.i = temp.r * a[i__1].i + temp.i * a[
-                                   i__1].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = i__ + j * a_dim1;
-                           i__2 = ix;
-                           z__2.r = a[i__1].r * x[i__2].r - a[i__1].i * x[
-                                   i__2].i, z__2.i = a[i__1].r * x[i__2].i + 
-                                   a[i__1].i * x[i__2].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L120: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__1 = ix;
-                           z__2.r = z__3.r * x[i__1].r - z__3.i * x[i__1].i, 
-                                   z__2.i = z__3.r * x[i__1].i + z__3.i * x[
-                                   i__1].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L130: */
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L170: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   ix = jx;
-                   if (noconj) {
-                       if (nounit) {
-                           i__2 = j + j * a_dim1;
-                           z__1.r = temp.r * a[i__2].r - temp.i * a[i__2].i, 
-                                   z__1.i = temp.r * a[i__2].i + temp.i * a[
-                                   i__2].r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L180: */
-                       }
-                   } else {
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z__1.r = temp.r * z__2.r - temp.i * z__2.i, 
-                                   z__1.i = temp.r * z__2.i + temp.i * 
-                                   z__2.r;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r + z__2.r, z__1.i = temp.i + 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTRMV . */
-
-} /* ztrmv_ */
-
-/* Subroutine */ int ztrsm_(char *side, char *uplo, char *transa, char *diag, 
-       integer *m, integer *n, doublecomplex *alpha, doublecomplex *a, 
-       integer *lda, doublecomplex *b, integer *ldb, ftnlen side_len, ftnlen 
-       uplo_len, ftnlen transa_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3, i__4, i__5, 
-           i__6, i__7;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, k, info;
-    static doublecomplex temp;
-    static logical lside;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    static integer nrowa;
-    static logical upper;
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTRSM  solves one of the matrix equations */
-
-/*     op( A )*X = alpha*B,   or   X*op( A ) = alpha*B, */
-
-/*  where alpha is a scalar, X and B are m by n matrices, A is a unit, or */
-/*  non-unit,  upper or lower triangular matrix  and  op( A )  is one  of */
-
-/*     op( A ) = A   or   op( A ) = A'   or   op( A ) = conjg( A' ). */
-
-/*  The matrix X is overwritten on B. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  SIDE   - CHARACTER*1. */
-/*           On entry, SIDE specifies whether op( A ) appears on the left */
-/*           or right of X as follows: */
-
-/*              SIDE = 'L' or 'l'   op( A )*X = alpha*B. */
-
-/*              SIDE = 'R' or 'r'   X*op( A ) = alpha*B. */
-
-/*           Unchanged on exit. */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix A is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANSA - CHARACTER*1. */
-/*           On entry, TRANSA specifies the form of op( A ) to be used in */
-/*           the matrix multiplication as follows: */
-
-/*              TRANSA = 'N' or 'n'   op( A ) = A. */
-
-/*              TRANSA = 'T' or 't'   op( A ) = A'. */
-
-/*              TRANSA = 'C' or 'c'   op( A ) = conjg( A' ). */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit triangular */
-/*           as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  M      - INTEGER. */
-/*           On entry, M specifies the number of rows of B. M must be at */
-/*           least zero. */
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the number of columns of B.  N must be */
-/*           at least zero. */
-/*           Unchanged on exit. */
-
-/*  ALPHA  - COMPLEX*16      . */
-/*           On entry,  ALPHA specifies the scalar  alpha. When  alpha is */
-/*           zero then  A is not referenced and  B need not be set before */
-/*           entry. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, k ), where k is m */
-/*           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'. */
-/*           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k */
-/*           upper triangular part of the array  A must contain the upper */
-/*           triangular matrix  and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k */
-/*           lower triangular part of the array  A must contain the lower */
-/*           triangular matrix  and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u',  the diagonal elements of */
-/*           A  are not referenced either,  but are assumed to be  unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then */
-/*           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r' */
-/*           then LDA must be at least max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  B      - COMPLEX*16       array of DIMENSION ( LDB, n ). */
-/*           Before entry,  the leading  m by n part of the array  B must */
-/*           contain  the  right-hand  side  matrix  B,  and  on exit  is */
-/*           overwritten by the solution matrix  X. */
-
-/*  LDB    - INTEGER. */
-/*           On entry, LDB specifies the first dimension of B as declared */
-/*           in  the  calling  (sub)  program.   LDB  must  be  at  least */
-/*           max( 1, m ). */
-/*           Unchanged on exit. */
-
-
-/*  Level 3 Blas routine. */
-
-/*  -- Written on 8-February-1989. */
-/*     Jack Dongarra, Argonne National Laboratory. */
-/*     Iain Duff, AERE Harwell. */
-/*     Jeremy Du Croz, Numerical Algorithms Group Ltd. */
-/*     Sven Hammarling, Numerical Algorithms Group Ltd. */
-
-
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. Local Scalars .. */
-/*     .. Parameters .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    b_dim1 = *ldb;
-    b_offset = 1 + b_dim1 * 1;
-    b -= b_offset;
-
-    /* Function Body */
-    lside = lsame_(side, "L", (ftnlen)1, (ftnlen)1);
-    if (lside) {
-       nrowa = *m;
-    } else {
-       nrowa = *n;
-    }
-    noconj = lsame_(transa, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-    upper = lsame_(uplo, "U", (ftnlen)1, (ftnlen)1);
-
-    info = 0;
-    if (! lside && ! lsame_(side, "R", (ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! upper && ! lsame_(uplo, "L", (ftnlen)1, (ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(transa, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(transa,
-            "T", (ftnlen)1, (ftnlen)1) && ! lsame_(transa, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 3;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 4;
-    } else if (*m < 0) {
-       info = 5;
-    } else if (*n < 0) {
-       info = 6;
-    } else if (*lda < max(1,nrowa)) {
-       info = 9;
-    } else if (*ldb < max(1,*m)) {
-       info = 11;
-    }
-    if (info != 0) {
-       xerbla_("ZTRSM ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-/*     And when  alpha.eq.zero. */
-
-    if (alpha->r == 0. && alpha->i == 0.) {
-       i__1 = *n;
-       for (j = 1; j <= i__1; ++j) {
-           i__2 = *m;
-           for (i__ = 1; i__ <= i__2; ++i__) {
-               i__3 = i__ + j * b_dim1;
-               b[i__3].r = 0., b[i__3].i = 0.;
-/* L10: */
-           }
-/* L20: */
-       }
-       return 0;
-    }
-
-/*     Start the operations. */
-
-    if (lside) {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*inv( A )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L30: */
-                       }
-                   }
-                   for (k = *m; k >= 1; --k) {
-                       i__2 = k + j * b_dim1;
-                       if (b[i__2].r != 0. || b[i__2].i != 0.) {
-                           if (nounit) {
-                               i__2 = k + j * b_dim1;
-                               z_div(&z__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-                           }
-                           i__2 = k - 1;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * b_dim1;
-                               i__6 = i__ + k * a_dim1;
-                               z__2.r = b[i__5].r * a[i__6].r - b[i__5].i * 
-                                       a[i__6].i, z__2.i = b[i__5].r * a[
-                                       i__6].i + b[i__5].i * a[i__6].r;
-                               z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
-                                       .i - z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L40: */
-                           }
-                       }
-/* L50: */
-                   }
-/* L60: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L70: */
-                       }
-                   }
-                   i__2 = *m;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * b_dim1;
-                       if (b[i__3].r != 0. || b[i__3].i != 0.) {
-                           if (nounit) {
-                               i__3 = k + j * b_dim1;
-                               z_div(&z__1, &b[k + j * b_dim1], &a[k + k * 
-                                       a_dim1]);
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = k + 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * b_dim1;
-                               i__7 = i__ + k * a_dim1;
-                               z__2.r = b[i__6].r * a[i__7].r - b[i__6].i * 
-                                       a[i__7].i, z__2.i = b[i__6].r * a[
-                                       i__7].i + b[i__6].i * a[i__7].r;
-                               z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
-                                       .i - z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L80: */
-                           }
-                       }
-/* L90: */
-                   }
-/* L100: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*inv( A' )*B */
-/*           or    B := alpha*inv( conjg( A' ) )*B. */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = *m;
-                   for (i__ = 1; i__ <= i__2; ++i__) {
-                       i__3 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3].i, 
-                               z__1.i = alpha->r * b[i__3].i + alpha->i * b[
-                               i__3].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       if (noconj) {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               i__4 = k + i__ * a_dim1;
-                               i__5 = k + j * b_dim1;
-                               z__2.r = a[i__4].r * b[i__5].r - a[i__4].i * 
-                                       b[i__5].i, z__2.i = a[i__4].r * b[
-                                       i__5].i + a[i__4].i * b[i__5].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L110: */
-                           }
-                           if (nounit) {
-                               z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       } else {
-                           i__3 = i__ - 1;
-                           for (k = 1; k <= i__3; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__4 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__4].r - z__3.i * b[i__4]
-                                       .i, z__2.i = z__3.r * b[i__4].i + 
-                                       z__3.i * b[i__4].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L120: */
-                           }
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z_div(&z__1, &temp, &z__2);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       }
-                       i__3 = i__ + j * b_dim1;
-                       b[i__3].r = temp.r, b[i__3].i = temp.i;
-/* L130: */
-                   }
-/* L140: */
-               }
-           } else {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   for (i__ = *m; i__ >= 1; --i__) {
-                       i__2 = i__ + j * b_dim1;
-                       z__1.r = alpha->r * b[i__2].r - alpha->i * b[i__2].i, 
-                               z__1.i = alpha->r * b[i__2].i + alpha->i * b[
-                               i__2].r;
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       if (noconj) {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               i__3 = k + i__ * a_dim1;
-                               i__4 = k + j * b_dim1;
-                               z__2.r = a[i__3].r * b[i__4].r - a[i__3].i * 
-                                       b[i__4].i, z__2.i = a[i__3].r * b[
-                                       i__4].i + a[i__3].i * b[i__4].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                           }
-                           if (nounit) {
-                               z_div(&z__1, &temp, &a[i__ + i__ * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       } else {
-                           i__2 = *m;
-                           for (k = i__ + 1; k <= i__2; ++k) {
-                               d_cnjg(&z__3, &a[k + i__ * a_dim1]);
-                               i__3 = k + j * b_dim1;
-                               z__2.r = z__3.r * b[i__3].r - z__3.i * b[i__3]
-                                       .i, z__2.i = z__3.r * b[i__3].i + 
-                                       z__3.i * b[i__3].r;
-                               z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                       z__2.i;
-                               temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                           }
-                           if (nounit) {
-                               d_cnjg(&z__2, &a[i__ + i__ * a_dim1]);
-                               z_div(&z__1, &temp, &z__2);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                       }
-                       i__2 = i__ + j * b_dim1;
-                       b[i__2].r = temp.r, b[i__2].i = temp.i;
-/* L170: */
-                   }
-/* L180: */
-               }
-           }
-       }
-    } else {
-       if (lsame_(transa, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*           Form  B := alpha*B*inv( A ). */
-
-           if (upper) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L190: */
-                       }
-                   }
-                   i__2 = j - 1;
-                   for (k = 1; k <= i__2; ++k) {
-                       i__3 = k + j * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = k + j * a_dim1;
-                               i__7 = i__ + k * b_dim1;
-                               z__2.r = a[i__6].r * b[i__7].r - a[i__6].i * 
-                                       b[i__7].i, z__2.i = a[i__6].r * b[
-                                       i__7].i + a[i__6].i * b[i__7].r;
-                               z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
-                                       .i - z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L200: */
-                           }
-                       }
-/* L210: */
-                   }
-                   if (nounit) {
-                       z_div(&z__1, &c_b2094, &a[j + j * a_dim1]);
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * b_dim1;
-                           i__4 = i__ + j * b_dim1;
-                           z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   z__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L220: */
-                       }
-                   }
-/* L230: */
-               }
-           } else {
-               for (j = *n; j >= 1; --j) {
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, z__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L240: */
-                       }
-                   }
-                   i__1 = *n;
-                   for (k = j + 1; k <= i__1; ++k) {
-                       i__2 = k + j * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = k + j * a_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = a[i__5].r * b[i__6].r - a[i__5].i * 
-                                       b[i__6].i, z__2.i = a[i__5].r * b[
-                                       i__6].i + a[i__5].i * b[i__6].r;
-                               z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
-                                       .i - z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L250: */
-                           }
-                       }
-/* L260: */
-                   }
-                   if (nounit) {
-                       z_div(&z__1, &c_b2094, &a[j + j * a_dim1]);
-                       temp.r = z__1.r, temp.i = z__1.i;
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + j * b_dim1;
-                           i__3 = i__ + j * b_dim1;
-                           z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   z__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L270: */
-                       }
-                   }
-/* L280: */
-               }
-           }
-       } else {
-
-/*           Form  B := alpha*B*inv( A' ) */
-/*           or    B := alpha*B*inv( conjg( A' ) ). */
-
-           if (upper) {
-               for (k = *n; k >= 1; --k) {
-                   if (nounit) {
-                       if (noconj) {
-                           z_div(&z__1, &c_b2094, &a[k + k * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z_div(&z__1, &c_b2094, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__3].r - temp.i * b[i__3].i, 
-                                   z__1.i = temp.r * b[i__3].i + temp.i * b[
-                                   i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L290: */
-                       }
-                   }
-                   i__1 = k - 1;
-                   for (j = 1; j <= i__1; ++j) {
-                       i__2 = j + k * a_dim1;
-                       if (a[i__2].r != 0. || a[i__2].i != 0.) {
-                           if (noconj) {
-                               i__2 = j + k * a_dim1;
-                               temp.r = a[i__2].r, temp.i = a[i__2].i;
-                           } else {
-                               d_cnjg(&z__1, &a[j + k * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__2 = *m;
-                           for (i__ = 1; i__ <= i__2; ++i__) {
-                               i__3 = i__ + j * b_dim1;
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__5].r - temp.i * b[i__5]
-                                       .i, z__2.i = temp.r * b[i__5].i + 
-                                       temp.i * b[i__5].r;
-                               z__1.r = b[i__4].r - z__2.r, z__1.i = b[i__4]
-                                       .i - z__2.i;
-                               b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L300: */
-                           }
-                       }
-/* L310: */
-                   }
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__1 = *m;
-                       for (i__ = 1; i__ <= i__1; ++i__) {
-                           i__2 = i__ + k * b_dim1;
-                           i__3 = i__ + k * b_dim1;
-                           z__1.r = alpha->r * b[i__3].r - alpha->i * b[i__3]
-                                   .i, z__1.i = alpha->r * b[i__3].i + 
-                                   alpha->i * b[i__3].r;
-                           b[i__2].r = z__1.r, b[i__2].i = z__1.i;
-/* L320: */
-                       }
-                   }
-/* L330: */
-               }
-           } else {
-               i__1 = *n;
-               for (k = 1; k <= i__1; ++k) {
-                   if (nounit) {
-                       if (noconj) {
-                           z_div(&z__1, &c_b2094, &a[k + k * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       } else {
-                           d_cnjg(&z__2, &a[k + k * a_dim1]);
-                           z_div(&z__1, &c_b2094, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           z__1.r = temp.r * b[i__4].r - temp.i * b[i__4].i, 
-                                   z__1.i = temp.r * b[i__4].i + temp.i * b[
-                                   i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L340: */
-                       }
-                   }
-                   i__2 = *n;
-                   for (j = k + 1; j <= i__2; ++j) {
-                       i__3 = j + k * a_dim1;
-                       if (a[i__3].r != 0. || a[i__3].i != 0.) {
-                           if (noconj) {
-                               i__3 = j + k * a_dim1;
-                               temp.r = a[i__3].r, temp.i = a[i__3].i;
-                           } else {
-                               d_cnjg(&z__1, &a[j + k * a_dim1]);
-                               temp.r = z__1.r, temp.i = z__1.i;
-                           }
-                           i__3 = *m;
-                           for (i__ = 1; i__ <= i__3; ++i__) {
-                               i__4 = i__ + j * b_dim1;
-                               i__5 = i__ + j * b_dim1;
-                               i__6 = i__ + k * b_dim1;
-                               z__2.r = temp.r * b[i__6].r - temp.i * b[i__6]
-                                       .i, z__2.i = temp.r * b[i__6].i + 
-                                       temp.i * b[i__6].r;
-                               z__1.r = b[i__5].r - z__2.r, z__1.i = b[i__5]
-                                       .i - z__2.i;
-                               b[i__4].r = z__1.r, b[i__4].i = z__1.i;
-/* L350: */
-                           }
-                       }
-/* L360: */
-                   }
-                   if (alpha->r != 1. || alpha->i != 0.) {
-                       i__2 = *m;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + k * b_dim1;
-                           i__4 = i__ + k * b_dim1;
-                           z__1.r = alpha->r * b[i__4].r - alpha->i * b[i__4]
-                                   .i, z__1.i = alpha->r * b[i__4].i + 
-                                   alpha->i * b[i__4].r;
-                           b[i__3].r = z__1.r, b[i__3].i = z__1.i;
-/* L370: */
-                       }
-                   }
-/* L380: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTRSM . */
-
-} /* ztrsm_ */
-
-/* Subroutine */ int ztrsv_(char *uplo, char *trans, char *diag, integer *n, 
-       doublecomplex *a, integer *lda, doublecomplex *x, integer *incx, 
-       ftnlen uplo_len, ftnlen trans_len, ftnlen diag_len)
-{
-    /* System generated locals */
-    integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5;
-    doublecomplex z__1, z__2, z__3;
-
-    /* Builtin functions */
-    void z_div(doublecomplex *, doublecomplex *, doublecomplex *), d_cnjg(
-           doublecomplex *, doublecomplex *);
-
-    /* Local variables */
-    static integer i__, j, ix, jx, kx, info;
-    static doublecomplex temp;
-    extern logical lsame_(char *, char *, ftnlen, ftnlen);
-    extern /* Subroutine */ int xerbla_(char *, integer *, ftnlen);
-    static logical noconj, nounit;
-
-/*     .. Scalar Arguments .. */
-/*     .. Array Arguments .. */
-/*     .. */
-
-/*  Purpose */
-/*  ======= */
-
-/*  ZTRSV  solves one of the systems of equations */
-
-/*     A*x = b,   or   A'*x = b,   or   conjg( A' )*x = b, */
-
-/*  where b and x are n element vectors and A is an n by n unit, or */
-/*  non-unit, upper or lower triangular matrix. */
-
-/*  No test for singularity or near-singularity is included in this */
-/*  routine. Such tests must be performed before calling this routine. */
-
-/*  Parameters */
-/*  ========== */
-
-/*  UPLO   - CHARACTER*1. */
-/*           On entry, UPLO specifies whether the matrix is an upper or */
-/*           lower triangular matrix as follows: */
-
-/*              UPLO = 'U' or 'u'   A is an upper triangular matrix. */
-
-/*              UPLO = 'L' or 'l'   A is a lower triangular matrix. */
-
-/*           Unchanged on exit. */
-
-/*  TRANS  - CHARACTER*1. */
-/*           On entry, TRANS specifies the equations to be solved as */
-/*           follows: */
-
-/*              TRANS = 'N' or 'n'   A*x = b. */
-
-/*              TRANS = 'T' or 't'   A'*x = b. */
-
-/*              TRANS = 'C' or 'c'   conjg( A' )*x = b. */
-
-/*           Unchanged on exit. */
-
-/*  DIAG   - CHARACTER*1. */
-/*           On entry, DIAG specifies whether or not A is unit */
-/*           triangular as follows: */
-
-/*              DIAG = 'U' or 'u'   A is assumed to be unit triangular. */
-
-/*              DIAG = 'N' or 'n'   A is not assumed to be unit */
-/*                                  triangular. */
-
-/*           Unchanged on exit. */
-
-/*  N      - INTEGER. */
-/*           On entry, N specifies the order of the matrix A. */
-/*           N must be at least zero. */
-/*           Unchanged on exit. */
-
-/*  A      - COMPLEX*16       array of DIMENSION ( LDA, n ). */
-/*           Before entry with  UPLO = 'U' or 'u', the leading n by n */
-/*           upper triangular part of the array A must contain the upper */
-/*           triangular matrix and the strictly lower triangular part of */
-/*           A is not referenced. */
-/*           Before entry with UPLO = 'L' or 'l', the leading n by n */
-/*           lower triangular part of the array A must contain the lower */
-/*           triangular matrix and the strictly upper triangular part of */
-/*           A is not referenced. */
-/*           Note that when  DIAG = 'U' or 'u', the diagonal elements of */
-/*           A are not referenced either, but are assumed to be unity. */
-/*           Unchanged on exit. */
-
-/*  LDA    - INTEGER. */
-/*           On entry, LDA specifies the first dimension of A as declared */
-/*           in the calling (sub) program. LDA must be at least */
-/*           max( 1, n ). */
-/*           Unchanged on exit. */
-
-/*  X      - COMPLEX*16       array of dimension at least */
-/*           ( 1 + ( n - 1 )*abs( INCX ) ). */
-/*           Before entry, the incremented array X must contain the n */
-/*           element right-hand side vector b. On exit, X is overwritten */
-/*           with the solution vector x. */
-
-/*  INCX   - INTEGER. */
-/*           On entry, INCX specifies the increment for the elements of */
-/*           X. INCX must not be zero. */
-/*           Unchanged on exit. */
-
-
-/*  Level 2 Blas routine. */
-
-/*  -- Written on 22-October-1986. */
-/*     Jack Dongarra, Argonne National Lab. */
-/*     Jeremy Du Croz, Nag Central Office. */
-/*     Sven Hammarling, Nag Central Office. */
-/*     Richard Hanson, Sandia National Labs. */
-
-
-/*     .. Parameters .. */
-/*     .. Local Scalars .. */
-/*     .. External Functions .. */
-/*     .. External Subroutines .. */
-/*     .. Intrinsic Functions .. */
-/*     .. */
-/*     .. Executable Statements .. */
-
-/*     Test the input parameters. */
-
-    /* Parameter adjustments */
-    a_dim1 = *lda;
-    a_offset = 1 + a_dim1 * 1;
-    a -= a_offset;
-    --x;
-
-    /* Function Body */
-    info = 0;
-    if (! lsame_(uplo, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(uplo, "L", (
-           ftnlen)1, (ftnlen)1)) {
-       info = 1;
-    } else if (! lsame_(trans, "N", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, 
-           "T", (ftnlen)1, (ftnlen)1) && ! lsame_(trans, "C", (ftnlen)1, (
-           ftnlen)1)) {
-       info = 2;
-    } else if (! lsame_(diag, "U", (ftnlen)1, (ftnlen)1) && ! lsame_(diag, 
-           "N", (ftnlen)1, (ftnlen)1)) {
-       info = 3;
-    } else if (*n < 0) {
-       info = 4;
-    } else if (*lda < max(1,*n)) {
-       info = 6;
-    } else if (*incx == 0) {
-       info = 8;
-    }
-    if (info != 0) {
-       xerbla_("ZTRSV ", &info, (ftnlen)6);
-       return 0;
-    }
-
-/*     Quick return if possible. */
-
-    if (*n == 0) {
-       return 0;
-    }
-
-    noconj = lsame_(trans, "T", (ftnlen)1, (ftnlen)1);
-    nounit = lsame_(diag, "N", (ftnlen)1, (ftnlen)1);
-
-/*     Set up the start point in X if the increment is not unity. This */
-/*     will be  ( N - 1 )*INCX  too small for descending loops. */
-
-    if (*incx <= 0) {
-       kx = 1 - (*n - 1) * *incx;
-    } else if (*incx != 1) {
-       kx = 1;
-    }
-
-/*     Start the operations. In this version the elements of A are */
-/*     accessed sequentially with one pass through A. */
-
-    if (lsame_(trans, "N", (ftnlen)1, (ftnlen)1)) {
-
-/*        Form  x := inv( A )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = j;
-                           z_div(&z__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = j;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           i__1 = i__;
-                           i__2 = i__;
-                           i__3 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L10: */
-                       }
-                   }
-/* L20: */
-               }
-           } else {
-               jx = kx + (*n - 1) * *incx;
-               for (j = *n; j >= 1; --j) {
-                   i__1 = jx;
-                   if (x[i__1].r != 0. || x[i__1].i != 0.) {
-                       if (nounit) {
-                           i__1 = jx;
-                           z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-                       }
-                       i__1 = jx;
-                       temp.r = x[i__1].r, temp.i = x[i__1].i;
-                       ix = jx;
-                       for (i__ = j - 1; i__ >= 1; --i__) {
-                           ix -= *incx;
-                           i__1 = ix;
-                           i__2 = ix;
-                           i__3 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__3].r - temp.i * a[i__3].i, 
-                                   z__2.i = temp.r * a[i__3].i + temp.i * a[
-                                   i__3].r;
-                           z__1.r = x[i__2].r - z__2.r, z__1.i = x[i__2].i - 
-                                   z__2.i;
-                           x[i__1].r = z__1.r, x[i__1].i = z__1.i;
-/* L30: */
-                       }
-                   }
-                   jx -= *incx;
-/* L40: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = j;
-                           z_div(&z__1, &x[j], &a[j + j * a_dim1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = j;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           i__3 = i__;
-                           i__4 = i__;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L50: */
-                       }
-                   }
-/* L60: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = jx;
-                   if (x[i__2].r != 0. || x[i__2].i != 0.) {
-                       if (nounit) {
-                           i__2 = jx;
-                           z_div(&z__1, &x[jx], &a[j + j * a_dim1]);
-                           x[i__2].r = z__1.r, x[i__2].i = z__1.i;
-                       }
-                       i__2 = jx;
-                       temp.r = x[i__2].r, temp.i = x[i__2].i;
-                       ix = jx;
-                       i__2 = *n;
-                       for (i__ = j + 1; i__ <= i__2; ++i__) {
-                           ix += *incx;
-                           i__3 = ix;
-                           i__4 = ix;
-                           i__5 = i__ + j * a_dim1;
-                           z__2.r = temp.r * a[i__5].r - temp.i * a[i__5].i, 
-                                   z__2.i = temp.r * a[i__5].i + temp.i * a[
-                                   i__5].r;
-                           z__1.r = x[i__4].r - z__2.r, z__1.i = x[i__4].i - 
-                                   z__2.i;
-                           x[i__3].r = z__1.r, x[i__3].i = z__1.i;
-/* L70: */
-                       }
-                   }
-                   jx += *incx;
-/* L80: */
-               }
-           }
-       }
-    } else {
-
-/*        Form  x := inv( A' )*x  or  x := inv( conjg( A' ) )*x. */
-
-       if (lsame_(uplo, "U", (ftnlen)1, (ftnlen)1)) {
-           if (*incx == 1) {
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   i__2 = j;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = i__;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L90: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = i__;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L100: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = j;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-/* L110: */
-               }
-           } else {
-               jx = kx;
-               i__1 = *n;
-               for (j = 1; j <= i__1; ++j) {
-                   ix = kx;
-                   i__2 = jx;
-                   temp.r = x[i__2].r, temp.i = x[i__2].i;
-                   if (noconj) {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           i__3 = i__ + j * a_dim1;
-                           i__4 = ix;
-                           z__2.r = a[i__3].r * x[i__4].r - a[i__3].i * x[
-                                   i__4].i, z__2.i = a[i__3].r * x[i__4].i + 
-                                   a[i__3].i * x[i__4].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L120: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__2 = j - 1;
-                       for (i__ = 1; i__ <= i__2; ++i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__3 = ix;
-                           z__2.r = z__3.r * x[i__3].r - z__3.i * x[i__3].i, 
-                                   z__2.i = z__3.r * x[i__3].i + z__3.i * x[
-                                   i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix += *incx;
-/* L130: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__2 = jx;
-                   x[i__2].r = temp.r, x[i__2].i = temp.i;
-                   jx += *incx;
-/* L140: */
-               }
-           }
-       } else {
-           if (*incx == 1) {
-               for (j = *n; j >= 1; --j) {
-                   i__1 = j;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = i__;
-                           z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, z__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L150: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__2 = i__;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-/* L160: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = j;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-/* L170: */
-               }
-           } else {
-               kx += (*n - 1) * *incx;
-               jx = kx;
-               for (j = *n; j >= 1; --j) {
-                   ix = kx;
-                   i__1 = jx;
-                   temp.r = x[i__1].r, temp.i = x[i__1].i;
-                   if (noconj) {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           i__2 = i__ + j * a_dim1;
-                           i__3 = ix;
-                           z__2.r = a[i__2].r * x[i__3].r - a[i__2].i * x[
-                                   i__3].i, z__2.i = a[i__2].r * x[i__3].i + 
-                                   a[i__2].i * x[i__3].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L180: */
-                       }
-                       if (nounit) {
-                           z_div(&z__1, &temp, &a[j + j * a_dim1]);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   } else {
-                       i__1 = j + 1;
-                       for (i__ = *n; i__ >= i__1; --i__) {
-                           d_cnjg(&z__3, &a[i__ + j * a_dim1]);
-                           i__2 = ix;
-                           z__2.r = z__3.r * x[i__2].r - z__3.i * x[i__2].i, 
-                                   z__2.i = z__3.r * x[i__2].i + z__3.i * x[
-                                   i__2].r;
-                           z__1.r = temp.r - z__2.r, z__1.i = temp.i - 
-                                   z__2.i;
-                           temp.r = z__1.r, temp.i = z__1.i;
-                           ix -= *incx;
-/* L190: */
-                       }
-                       if (nounit) {
-                           d_cnjg(&z__2, &a[j + j * a_dim1]);
-                           z_div(&z__1, &temp, &z__2);
-                           temp.r = z__1.r, temp.i = z__1.i;
-                       }
-                   }
-                   i__1 = jx;
-                   x[i__1].r = temp.r, x[i__1].i = temp.i;
-                   jx -= *incx;
-/* L200: */
-               }
-           }
-       }
-    }
-
-    return 0;
-
-/*     End of ZTRSV . */
-
-} /* ztrsv_ */
-
diff --git a/superlu/BLAS/License.txt b/superlu/BLAS/License.txt
deleted file mode 100644
index b7ca013b..00000000
--- a/superlu/BLAS/License.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-  The reference BLAS is a freely-available software package. It is available 
from netlib via anonymous ftp
-  and the World Wide Web. Thus, it can be included in commercial software 
packages (and has been). We only
-  ask that proper credit be given to the authors.
-
-  Like all software, it is copyrighted. It is not trademarked, but we do ask 
the following:
-
-  If you modify the source for these routines we ask that you change the name 
of the routine and comment
-  the changes made to the original.
-
-  We will gladly answer any questions regarding the software. If a 
modification is done, however, it is the
-  responsibility of the person who modified the routine to provide support.
-
-  see https://www.openhub.net/licenses/blas
-
diff --git a/superlu/BLAS/caxpy.f b/superlu/BLAS/caxpy.f
deleted file mode 100644
index 7ee77747..00000000
--- a/superlu/BLAS/caxpy.f
+++ /dev/null
@@ -1,102 +0,0 @@
-*> \brief \b CAXPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CAXPY(N,CA,CX,INCX,CY,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX CA
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CAXPY constant times a vector plus a vector.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CAXPY(N,CA,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX CA
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY
-*     ..
-*     .. External Functions ..
-      REAL SCABS1
-      EXTERNAL SCABS1
-*     ..
-      IF (N.LE.0) RETURN
-      IF (SCABS1(CA).EQ.0.0E+0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            CY(I) = CY(I) + CA*CX(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CY(IY) = CY(IY) + CA*CX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-*
-      RETURN
-      END
diff --git a/superlu/BLAS/ccopy.f b/superlu/BLAS/ccopy.f
deleted file mode 100644
index eeb5f299..00000000
--- a/superlu/BLAS/ccopy.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b CCOPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CCOPY(N,CX,INCX,CY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CCOPY copies a vector x to a vector y.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CCOPY(N,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            CY(I) = CX(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CY(IY) = CX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/cdotc.f b/superlu/BLAS/cdotc.f
deleted file mode 100644
index cd341698..00000000
--- a/superlu/BLAS/cdotc.f
+++ /dev/null
@@ -1,103 +0,0 @@
-*> \brief \b CDOTC
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CDOTC forms the dot product of two complex vectors
-*>      CDOTC = X^H * Y
-*>
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack,  3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX CTEMP
-      INTEGER I,IX,IY
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG
-*     ..
-      CTEMP = (0.0,0.0)
-      CDOTC = (0.0,0.0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            CTEMP = CTEMP + CONJG(CX(I))*CY(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CTEMP + CONJG(CX(IX))*CY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      CDOTC = CTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/cdotu.f b/superlu/BLAS/cdotu.f
deleted file mode 100644
index 1e127bc0..00000000
--- a/superlu/BLAS/cdotu.f
+++ /dev/null
@@ -1,100 +0,0 @@
-*> \brief \b CDOTU
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CDOTU forms the dot product of two complex vectors
-*>      CDOTU = X^T * Y
-*>
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX CTEMP
-      INTEGER I,IX,IY
-*     ..
-      CTEMP = (0.0,0.0)
-      CDOTU = (0.0,0.0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            CTEMP = CTEMP + CX(I)*CY(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CTEMP + CX(IX)*CY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      CDOTU = CTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/cgbmv.f b/superlu/BLAS/cgbmv.f
deleted file mode 100644
index de12852a..00000000
--- a/superlu/BLAS/cgbmv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b CGBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER INCX,INCY,KL,KU,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CGBMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>
-*>    y := alpha*A**H*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*>
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER INCX,INCY,KL,KU,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KUP1,KX,KY,LENX,LENY
-      LOGICAL NOCONJ
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 10
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CGBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KUP1 = KU + 1
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + CONJG(A(K+I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 120 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + CONJG(A(K+I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  140         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CGBMV .
-*
-      END
diff --git a/superlu/BLAS/cgemm.f b/superlu/BLAS/cgemm.f
deleted file mode 100644
index 018ffad6..00000000
--- a/superlu/BLAS/cgemm.f
+++ /dev/null
@@ -1,483 +0,0 @@
-*> \brief \b CGEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,M,N
-*       CHARACTER TRANSA,TRANSB
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CGEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*>
-*> where  op( X ) is one of
-*>
-*>    op( X ) = X   or   op( X ) = X**T   or   op( X ) = X**H,
-*>
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c',  op( A ) = A**H.
-*> \endverbatim
-*>
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>
-*>              TRANSB = 'C' or 'c',  op( B ) = B**H.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,M,N
-      CHARACTER TRANSA,TRANSB
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
-      LOGICAL CONJA,CONJB,NOTA,NOTB
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
-*     B  respectively are to be  transposed but  not conjugated  and set
-*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
-*     and the number of rows of  B  respectively.
-*
-      NOTA = LSAME(TRANSA,'N')
-      NOTB = LSAME(TRANSB,'N')
-      CONJA = LSAME(TRANSA,'C')
-      CONJB = LSAME(TRANSB,'C')
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND.
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 8
-      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
-          INFO = 10
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CGEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B + beta*C.
-*
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE IF (CONJA) THEN
-*
-*           Form  C := alpha*A**H*B + beta*C.
-*
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B + beta*C
-*
-              DO 150 J = 1,N
-                  DO 140 I = 1,M
-                      TEMP = ZERO
-                      DO 130 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  130                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  140             CONTINUE
-  150         CONTINUE
-          END IF
-      ELSE IF (NOTA) THEN
-          IF (CONJB) THEN
-*
-*           Form  C := alpha*A*B**H + beta*C.
-*
-              DO 200 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 160 I = 1,M
-                          C(I,J) = ZERO
-  160                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 170 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  170                 CONTINUE
-                  END IF
-                  DO 190 L = 1,K
-                      TEMP = ALPHA*CONJG(B(J,L))
-                      DO 180 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  180                 CONTINUE
-  190             CONTINUE
-  200         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A*B**T + beta*C
-*
-              DO 250 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 210 I = 1,M
-                          C(I,J) = ZERO
-  210                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 220 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  220                 CONTINUE
-                  END IF
-                  DO 240 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 230 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  230                 CONTINUE
-  240             CONTINUE
-  250         CONTINUE
-          END IF
-      ELSE IF (CONJA) THEN
-          IF (CONJB) THEN
-*
-*           Form  C := alpha*A**H*B**H + beta*C.
-*
-              DO 280 J = 1,N
-                  DO 270 I = 1,M
-                      TEMP = ZERO
-                      DO 260 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*CONJG(B(J,L))
-  260                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  270             CONTINUE
-  280         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**H*B**T + beta*C
-*
-              DO 310 J = 1,N
-                  DO 300 I = 1,M
-                      TEMP = ZERO
-                      DO 290 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*B(J,L)
-  290                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  300             CONTINUE
-  310         CONTINUE
-          END IF
-      ELSE
-          IF (CONJB) THEN
-*
-*           Form  C := alpha*A**T*B**H + beta*C
-*
-              DO 340 J = 1,N
-                  DO 330 I = 1,M
-                      TEMP = ZERO
-                      DO 320 L = 1,K
-                          TEMP = TEMP + A(L,I)*CONJG(B(J,L))
-  320                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  330             CONTINUE
-  340         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B**T + beta*C
-*
-              DO 370 J = 1,N
-                  DO 360 I = 1,M
-                      TEMP = ZERO
-                      DO 350 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  350                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  360             CONTINUE
-  370         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CGEMM .
-*
-      END
diff --git a/superlu/BLAS/cgemv.f b/superlu/BLAS/cgemv.f
deleted file mode 100644
index aeb94090..00000000
--- a/superlu/BLAS/cgemv.f
+++ /dev/null
@@ -1,350 +0,0 @@
-*> \brief \b CGEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CGEMV performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>
-*>    y := alpha*A**H*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER INCX,INCY,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
-      LOGICAL NOCONJ
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CGEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  IF (NOCONJ) THEN
-                      DO 90 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = 1,M
-                          TEMP = TEMP + CONJG(A(I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  IF (NOCONJ) THEN
-                      DO 120 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = 1,M
-                          TEMP = TEMP + CONJG(A(I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  140         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CGEMV .
-*
-      END
diff --git a/superlu/BLAS/cgerc.f b/superlu/BLAS/cgerc.f
deleted file mode 100644
index e730edfd..00000000
--- a/superlu/BLAS/cgerc.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b CGERC
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CGERC  performs the rank 1 operation
-*>
-*>    A := alpha*x*y**H + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CGERC ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*CONJG(Y(JY))
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*CONJG(Y(JY))
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of CGERC .
-*
-      END
diff --git a/superlu/BLAS/cgeru.f b/superlu/BLAS/cgeru.f
deleted file mode 100644
index bc7540fa..00000000
--- a/superlu/BLAS/cgeru.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b CGERU
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CGERU(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CGERU  performs the rank 1 operation
-*>
-*>    A := alpha*x*y**T + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CGERU(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CGERU ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of CGERU .
-*
-      END
diff --git a/superlu/BLAS/chbmv.f b/superlu/BLAS/chbmv.f
deleted file mode 100644
index 435c8dd2..00000000
--- a/superlu/BLAS/chbmv.f
+++ /dev/null
@@ -1,380 +0,0 @@
-*> \brief \b CHBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER INCX,INCY,K,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHBMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian band matrix, with k super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER INCX,INCY,K,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,MIN,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when upper triangle of A is stored.
-*
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(KPLUS1,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when lower triangle of A is stored.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*REAL(A(1,J))
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(1,J))
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + CONJG(A(L+I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHBMV .
-*
-      END
diff --git a/superlu/BLAS/chemm.f b/superlu/BLAS/chemm.f
deleted file mode 100644
index 834b209a..00000000
--- a/superlu/BLAS/chemm.f
+++ /dev/null
@@ -1,371 +0,0 @@
-*> \brief \b CHEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER LDA,LDB,LDC,M,N
-*       CHARACTER SIDE,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*A*B + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*B*A + beta*C,
-*>
-*> where alpha and beta are scalars, A is an hermitian matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  hermitian matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  hermitian  matrix   A  is  to  be
-*>           referenced as follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Note that the imaginary parts  of the diagonal elements need
-*>           not be set, they are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER LDA,LDB,LDC,M,N
-      CHARACTER SIDE,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,REAL
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Set NROWA as the number of rows of A.
-*
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(SIDE,'L')) THEN
-*
-*        Form  C := alpha*A*B + beta*C.
-*
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*CONJG(A(K,I))
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*REAL(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*REAL(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*CONJG(A(K,I))
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*REAL(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*REAL(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*B*A + beta*C.
-*
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*REAL(A(J,J))
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*CONJG(A(J,K))
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*CONJG(A(J,K))
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of CHEMM .
-*
-      END
diff --git a/superlu/BLAS/chemv.f b/superlu/BLAS/chemv.f
deleted file mode 100644
index 21509297..00000000
--- a/superlu/BLAS/chemv.f
+++ /dev/null
@@ -1,337 +0,0 @@
-*> \brief \b CHEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHEMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 5
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when A is stored in upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*REAL(A(J,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(J,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when A is stored in lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*REAL(A(J,J))
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*REAL(A(J,J))
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + CONJG(A(I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHEMV .
-*
-      END
diff --git a/superlu/BLAS/cher.f b/superlu/BLAS/cher.f
deleted file mode 100644
index 78a4e0b7..00000000
--- a/superlu/BLAS/cher.f
+++ /dev/null
@@ -1,278 +0,0 @@
-*> \brief \b CHER
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHER(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHER   performs the hermitian rank 1 operation
-*>
-*>    A := alpha*x*x**H + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHER(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHER  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.REAL(ZERO))) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in upper triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) + REAL(X(J)*TEMP)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      IX = KX
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) + REAL(X(JX)*TEMP)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in lower triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      A(J,J) = REAL(A(J,J)) + REAL(TEMP*X(J))
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      A(J,J) = REAL(A(J,J)) + REAL(TEMP*X(JX))
-                      IX = JX
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHER  .
-*
-      END
diff --git a/superlu/BLAS/cher2.f b/superlu/BLAS/cher2.f
deleted file mode 100644
index fd65f970..00000000
--- a/superlu/BLAS/cher2.f
+++ /dev/null
@@ -1,317 +0,0 @@
-*> \brief \b CHER2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHER2  performs the hermitian rank 2 operation
-*>
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n hermitian matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHER2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      A(J,J) = REAL(A(J,J)) +
-     +                         REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = REAL(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHER2 .
-*
-      END
diff --git a/superlu/BLAS/cher2k.f b/superlu/BLAS/cher2k.f
deleted file mode 100644
index ace3c5d2..00000000
--- a/superlu/BLAS/cher2k.f
+++ /dev/null
@@ -1,442 +0,0 @@
-*> \brief \b CHER2K
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       REAL BETA
-*       INTEGER K,LDA,LDB,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHER2K  performs one of the hermitian rank 2k operations
-*>
-*>    C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
-*>
-*> where  alpha and beta  are scalars with  beta  real,  C is an  n by n
-*> hermitian matrix and  A and B  are  n by k matrices in the first case
-*> and  k by n  matrices in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**H          +
-*>                                         conjg( alpha )*B*A**H +
-*>                                         beta*C.
-*>
-*>              TRANS = 'C' or 'c'    C := alpha*A**H*B          +
-*>                                         conjg( alpha )*B**H*A +
-*>                                         beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>
-*>  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      REAL BETA
-      INTEGER K,LDA,LDB,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,REAL
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE
-      PARAMETER (ONE=1.0E+0)
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHER2K',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.REAL(ZERO)) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.REAL(ZERO)) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*REAL(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*B**H + conjg( alpha )*B*A**H +
-*                   C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.REAL(ZERO)) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*CONJG(B(J,L))
-                          TEMP2 = CONJG(ALPHA*A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                          C(J,J) = REAL(C(J,J)) +
-     +                             REAL(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.REAL(ZERO)) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*CONJG(B(J,L))
-                          TEMP2 = CONJG(ALPHA*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                          C(J,J) = REAL(C(J,J)) +
-     +                             REAL(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**H*B + conjg( alpha )*B**H*A +
-*                   C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + CONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + CONJG(B(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(J,J) = REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*REAL(C(J,J)) +
-     +                                 REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + CONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 CONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + CONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + CONJG(B(L,I))*A(L,J)
-  220                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(J,J) = REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*REAL(C(J,J)) +
-     +                                 REAL(ALPHA*TEMP1+
-     +                                 CONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.REAL(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + CONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 CONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHER2K.
-*
-      END
diff --git a/superlu/BLAS/cherk.f b/superlu/BLAS/cherk.f
deleted file mode 100644
index 1c47e57b..00000000
--- a/superlu/BLAS/cherk.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b CHERK
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER K,LDA,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHERK  performs one of the hermitian rank k operations
-*>
-*>    C := alpha*A*A**H + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**H*A + beta*C,
-*>
-*> where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
-*> matrix and  A  is an  n by k  matrix in the  first case and a  k by n
-*> matrix in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**H + beta*C.
-*>
-*>              TRANS = 'C' or 'c'   C := alpha*A**H*A + beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>
-*>  -- Modified 8-Nov-93 to set C(J,J) to REAL( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER K,LDA,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CMPLX,CONJG,MAX,REAL
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      REAL RTEMP
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHERK ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*REAL(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*A**H + beta*C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*REAL(C(J,J))
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.CMPLX(ZERO)) THEN
-                          TEMP = ALPHA*CONJG(A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                          C(J,J) = REAL(C(J,J)) + REAL(TEMP*A(I,L))
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      C(J,J) = BETA*REAL(C(J,J))
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  ELSE
-                      C(J,J) = REAL(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.CMPLX(ZERO)) THEN
-                          TEMP = ALPHA*CONJG(A(J,L))
-                          C(J,J) = REAL(C(J,J)) + REAL(TEMP*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**H*A + beta*C.
-*
-          IF (UPPER) THEN
-              DO 220 J = 1,N
-                  DO 200 I = 1,J - 1
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-                  RTEMP = ZERO
-                  DO 210 L = 1,K
-                      RTEMP = RTEMP + CONJG(A(L,J))*A(L,J)
-  210             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*REAL(C(J,J))
-                  END IF
-  220         CONTINUE
-          ELSE
-              DO 260 J = 1,N
-                  RTEMP = ZERO
-                  DO 230 L = 1,K
-                      RTEMP = RTEMP + CONJG(A(L,J))*A(L,J)
-  230             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*REAL(C(J,J))
-                  END IF
-                  DO 250 I = J + 1,N
-                      TEMP = ZERO
-                      DO 240 L = 1,K
-                          TEMP = TEMP + CONJG(A(L,I))*A(L,J)
-  240                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  250             CONTINUE
-  260         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHERK .
-*
-      END
diff --git a/superlu/BLAS/chpmv.f b/superlu/BLAS/chpmv.f
deleted file mode 100644
index b182bfb9..00000000
--- a/superlu/BLAS/chpmv.f
+++ /dev/null
@@ -1,338 +0,0 @@
-*> \brief \b CHPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHPMV  performs the matrix-vector operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 6
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when AP contains the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*REAL(AP(KK+J-1)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when AP contains the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*REAL(AP(KK))
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*REAL(AP(KK))
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + CONJG(AP(K))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHPMV .
-*
-      END
diff --git a/superlu/BLAS/chpr.f b/superlu/BLAS/chpr.f
deleted file mode 100644
index 6212c043..00000000
--- a/superlu/BLAS/chpr.f
+++ /dev/null
@@ -1,279 +0,0 @@
-*> \brief \b CHPR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHPR    performs the hermitian rank 1 operation
-*>
-*>    A := alpha*x*x**H + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHPR  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.REAL(ZERO))) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) + REAL(X(J)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      IX = KX
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) + REAL(X(JX)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(J))
-                      AP(KK) = REAL(AP(KK)) + REAL(TEMP*X(J))
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*CONJG(X(JX))
-                      AP(KK) = REAL(AP(KK)) + REAL(TEMP*X(JX))
-                      IX = JX
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          AP(K) = AP(K) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHPR  .
-*
-      END
diff --git a/superlu/BLAS/chpr2.f b/superlu/BLAS/chpr2.f
deleted file mode 100644
index 3ca388a4..00000000
--- a/superlu/BLAS/chpr2.f
+++ /dev/null
@@ -1,318 +0,0 @@
-*> \brief \b CHPR2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CHPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CHPR2  performs the hermitian rank 2 operation
-*>
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CHPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,REAL
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CHPR2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) +
-     +                             REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      AP(KK+J-1) = REAL(AP(KK+J-1)) +
-     +                             REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = REAL(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(J))
-                      TEMP2 = CONJG(ALPHA*X(J))
-                      AP(KK) = REAL(AP(KK)) +
-     +                         REAL(X(J)*TEMP1+Y(J)*TEMP2)
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*CONJG(Y(JY))
-                      TEMP2 = CONJG(ALPHA*X(JX))
-                      AP(KK) = REAL(AP(KK)) +
-     +                         REAL(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = REAL(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CHPR2 .
-*
-      END
diff --git a/superlu/BLAS/crotg.f b/superlu/BLAS/crotg.f
deleted file mode 100644
index 1cdb662e..00000000
--- a/superlu/BLAS/crotg.f
+++ /dev/null
@@ -1,74 +0,0 @@
-*> \brief \b CROTG
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CROTG(CA,CB,C,S)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX CA,CB,S
-*       REAL C
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CROTG determines a complex Givens rotation.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE CROTG(CA,CB,C,S)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX CA,CB,S
-      REAL C
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX ALPHA
-      REAL NORM,SCALE
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CABS,CONJG,SQRT
-*     ..
-      IF (CABS(CA).EQ.0.) THEN
-         C = 0.
-         S = (1.,0.)
-         CA = CB
-      ELSE
-         SCALE = CABS(CA) + CABS(CB)
-         NORM = SCALE*SQRT((CABS(CA/SCALE))**2+ (CABS(CB/SCALE))**2)
-         ALPHA = CA/CABS(CA)
-         C = CABS(CA)/NORM
-         S = ALPHA*CONJG(CB)/NORM
-         CA = ALPHA*NORM
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/cscal.f b/superlu/BLAS/cscal.f
deleted file mode 100644
index 1405a977..00000000
--- a/superlu/BLAS/cscal.f
+++ /dev/null
@@ -1,91 +0,0 @@
-*> \brief \b CSCAL
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSCAL(N,CA,CX,INCX)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX CA
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CSCAL scales a vector by a constant.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack,  3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CSCAL(N,CA,CX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX CA
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,NINCX
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            CX(I) = CA*CX(I)
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            CX(I) = CA*CX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/csrot.f b/superlu/BLAS/csrot.f
deleted file mode 100644
index aa8564e7..00000000
--- a/superlu/BLAS/csrot.f
+++ /dev/null
@@ -1,153 +0,0 @@
-*> \brief \b CSROT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSROT( N, CX, INCX, CY, INCY, C, S )
-*
-*       .. Scalar Arguments ..
-*       INTEGER           INCX, INCY, N
-*       REAL              C, S
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX           CX( * ), CY( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CSROT applies a plane rotation, where the cos and sin (c and s) are real
-*> and the vectors cx and cy are complex.
-*> jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the vectors cx and cy.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in,out] CX
-*> \verbatim
-*>          CX is COMPLEX array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array CX must contain the n
-*>           element vector cx. On exit, CX is overwritten by the updated
-*>           vector cx.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           CX. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] CY
-*> \verbatim
-*>          CY is COMPLEX array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array CY must contain the n
-*>           element vector cy. On exit, CY is overwritten by the updated
-*>           vector cy.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           CY. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in] C
-*> \verbatim
-*>          C is REAL
-*>           On entry, C specifies the cosine, cos.
-*> \endverbatim
-*>
-*> \param[in] S
-*> \verbatim
-*>          S is REAL
-*>           On entry, S specifies the sine, sin.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE CSROT( N, CX, INCX, CY, INCY, C, S )
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER           INCX, INCY, N
-      REAL              C, S
-*     ..
-*     .. Array Arguments ..
-      COMPLEX           CX( * ), CY( * )
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER           I, IX, IY
-      COMPLEX           CTEMP
-*     ..
-*     .. Executable Statements ..
-*
-      IF( N.LE.0 )
-     $   RETURN
-      IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1, N
-            CTEMP = C*CX( I ) + S*CY( I )
-            CY( I ) = C*CY( I ) - S*CX( I )
-            CX( I ) = CTEMP
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments not equal
-*          to 1
-*
-         IX = 1
-         IY = 1
-         IF( INCX.LT.0 )
-     $      IX = ( -N+1 )*INCX + 1
-         IF( INCY.LT.0 )
-     $      IY = ( -N+1 )*INCY + 1
-         DO I = 1, N
-            CTEMP = C*CX( IX ) + S*CY( IY )
-            CY( IY ) = C*CY( IY ) - S*CX( IX )
-            CX( IX ) = CTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/csscal.f b/superlu/BLAS/csscal.f
deleted file mode 100644
index dc02654f..00000000
--- a/superlu/BLAS/csscal.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b CSSCAL
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSSCAL(N,SA,CX,INCX)
-*
-*       .. Scalar Arguments ..
-*       REAL SA
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CSSCAL scales a complex vector by a real constant.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CSSCAL(N,SA,CX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL SA
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC AIMAG,CMPLX,REAL
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            CX(I) = CMPLX(SA*REAL(CX(I)),SA*AIMAG(CX(I)))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            CX(I) = CMPLX(SA*REAL(CX(I)),SA*AIMAG(CX(I)))
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/cswap.f b/superlu/BLAS/cswap.f
deleted file mode 100644
index 369a294e..00000000
--- a/superlu/BLAS/cswap.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b CSWAP
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSWAP(N,CX,INCX,CY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*),CY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>   CSWAP interchanges two vectors.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CSWAP(N,CX,INCX,CY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*),CY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX CTEMP
-      INTEGER I,IX,IY
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*       code for both increments equal to 1
-         DO I = 1,N
-            CTEMP = CX(I)
-            CX(I) = CY(I)
-            CY(I) = CTEMP
-         END DO
-      ELSE
-*
-*       code for unequal increments or equal increments not equal
-*         to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            CTEMP = CX(IX)
-            CX(IX) = CY(IY)
-            CY(IY) = CTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/csymm.f b/superlu/BLAS/csymm.f
deleted file mode 100644
index 906a5720..00000000
--- a/superlu/BLAS/csymm.f
+++ /dev/null
@@ -1,369 +0,0 @@
-*> \brief \b CSYMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER LDA,LDB,LDC,M,N
-*       CHARACTER SIDE,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CSYMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*A*B + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*B*A + beta*C,
-*>
-*> where  alpha and beta are scalars, A is a symmetric matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER LDA,LDB,LDC,M,N
-      CHARACTER SIDE,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Set NROWA as the number of rows of A.
-*
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CSYMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(SIDE,'L')) THEN
-*
-*        Form  C := alpha*A*B + beta*C.
-*
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*B*A + beta*C.
-*
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of CSYMM .
-*
-      END
diff --git a/superlu/BLAS/csyr2k.f b/superlu/BLAS/csyr2k.f
deleted file mode 100644
index 1fdeadc0..00000000
--- a/superlu/BLAS/csyr2k.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b CSYR2K
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CSYR2K  performs one of the symmetric rank 2k operations
-*>
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**T + alpha*B*A**T +
-*>                                         beta*C.
-*>
-*>              TRANS = 'T' or 't'    C := alpha*A**T*B + alpha*B**T*A +
-*>                                         beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP1,TEMP2
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CSYR2K',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CSYR2K.
-*
-      END
diff --git a/superlu/BLAS/csyrk.f b/superlu/BLAS/csyrk.f
deleted file mode 100644
index c4494c5a..00000000
--- a/superlu/BLAS/csyrk.f
+++ /dev/null
@@ -1,363 +0,0 @@
-*> \brief \b CSYRK
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA,BETA
-*       INTEGER K,LDA,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CSYRK  performs one of the symmetric rank k operations
-*>
-*>    C := alpha*A*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA,BETA
-      INTEGER K,LDA,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CSYRK ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*A**T + beta*C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*A + beta*C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CSYRK .
-*
-      END
diff --git a/superlu/BLAS/ctbmv.f b/superlu/BLAS/ctbmv.f
deleted file mode 100644
index 1513c1a3..00000000
--- a/superlu/BLAS/ctbmv.f
+++ /dev/null
@@ -1,429 +0,0 @@
-*> \brief \b CTBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTBMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*         Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x  or  x := A**H*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 90 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J))
-                          DO 100 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 120 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(KPLUS1,J))
-                          DO 130 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 150 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J))
-                          DO 160 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 180 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(1,J))
-                          DO 190 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + CONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTBMV .
-*
-      END
diff --git a/superlu/BLAS/ctbsv.f b/superlu/BLAS/ctbsv.f
deleted file mode 100644
index f4cc3306..00000000
--- a/superlu/BLAS/ctbsv.f
+++ /dev/null
@@ -1,432 +0,0 @@
-*> \brief \b CTBSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTBSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTBSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 90 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 100 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(KPLUS1,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 120 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 130 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(KPLUS1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 150 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 160 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(1,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 180 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 190 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - CONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTBSV .
-*
-      END
diff --git a/superlu/BLAS/ctpmv.f b/superlu/BLAS/ctpmv.f
deleted file mode 100644
index 4582acc9..00000000
--- a/superlu/BLAS/ctpmv.f
+++ /dev/null
@@ -1,388 +0,0 @@
-*> \brief \b CTPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTPMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x:= A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x  or  x := A**H*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK - 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K - 1
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + CONJG(AP(K))*X(I)
-                              K = K - 1
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - J
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 120 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 130 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + CONJG(AP(K))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      K = KK + 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K + 1
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + CONJG(AP(K))*X(I)
-                              K = K + 1
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 180 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(AP(KK))
-                          DO 190 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + CONJG(AP(K))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTPMV .
-*
-      END
diff --git a/superlu/BLAS/ctpsv.f b/superlu/BLAS/ctpsv.f
deleted file mode 100644
index 2fcd19ba..00000000
--- a/superlu/BLAS/ctpsv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b CTPSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTPSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTPSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K + 1
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - CONJG(AP(K))*X(I)
-                              K = K + 1
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK+J-1))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + J
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 120 K = KK,KK + J - 2
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 130 K = KK,KK + J - 2
-                              TEMP = TEMP - CONJG(AP(K))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK+J-1))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K - 1
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - CONJG(AP(K))*X(I)
-                              K = K - 1
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK-N+J))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 180 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 190 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - CONJG(AP(K))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(AP(KK-N+J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTPSV .
-*
-      END
diff --git a/superlu/BLAS/ctrmm.f b/superlu/BLAS/ctrmm.f
deleted file mode 100644
index a23fb27c..00000000
--- a/superlu/BLAS/ctrmm.f
+++ /dev/null
@@ -1,452 +0,0 @@
-*> \brief \b CTRMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTRMM  performs one of the matrix-matrix operations
-*>
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A )
-*>
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOCONJ = LSAME(TRANSA,'T')
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTRMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*A*B.
-*
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*A**T*B   or   B := alpha*A**H*B.
-*
-              IF (UPPER) THEN
-                  DO 120 J = 1,N
-                      DO 110 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 90 K = 1,I - 1
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-   90                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*CONJG(A(I,I))
-                              DO 100 K = 1,I - 1
-                                  TEMP = TEMP + CONJG(A(K,I))*B(K,J)
-  100                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  110                 CONTINUE
-  120             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 130 K = I + 1,M
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-  130                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*CONJG(A(I,I))
-                              DO 140 K = I + 1,M
-                                  TEMP = TEMP + CONJG(A(K,I))*B(K,J)
-  140                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*A.
-*
-              IF (UPPER) THEN
-                  DO 200 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 170 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  170                 CONTINUE
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-  200             CONTINUE
-              ELSE
-                  DO 240 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 210 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  210                 CONTINUE
-                      DO 230 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 220 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  220                         CONTINUE
-                          END IF
-  230                 CONTINUE
-  240             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*A**T   or   B := alpha*B*A**H.
-*
-              IF (UPPER) THEN
-                  DO 280 K = 1,N
-                      DO 260 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*CONJG(A(J,K))
-                              END IF
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*CONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              ELSE
-                  DO 320 K = N,1,-1
-                      DO 300 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*CONJG(A(J,K))
-                              END IF
-                              DO 290 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  290                         CONTINUE
-                          END IF
-  300                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*CONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 310 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  310                     CONTINUE
-                      END IF
-  320             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTRMM .
-*
-      END
diff --git a/superlu/BLAS/ctrmv.f b/superlu/BLAS/ctrmv.f
deleted file mode 100644
index 8795e870..00000000
--- a/superlu/BLAS/ctrmv.f
+++ /dev/null
@@ -1,373 +0,0 @@
-*> \brief \b CTRMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTRMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTRMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x  or  x := A**H*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + A(I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + CONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 120 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 130 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + CONJG(A(I,J))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + A(I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + CONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 180 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*CONJG(A(J,J))
-                          DO 190 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + CONJG(A(I,J))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTRMV .
-*
-      END
diff --git a/superlu/BLAS/ctrsm.f b/superlu/BLAS/ctrsm.f
deleted file mode 100644
index 7ee5c947..00000000
--- a/superlu/BLAS/ctrsm.f
+++ /dev/null
@@ -1,477 +0,0 @@
-*> \brief \b CTRSM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTRSM  solves one of the matrix equations
-*>
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*>
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*>
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is COMPLEX array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX ONE
-      PARAMETER (ONE= (1.0E+0,0.0E+0))
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOCONJ = LSAME(TRANSA,'T')
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTRSM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*inv( A )*B.
-*
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*inv( A**T )*B
-*           or    B := alpha*inv( A**H )*B.
-*
-              IF (UPPER) THEN
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 110 K = 1,I - 1
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  110                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 120 K = 1,I - 1
-                                  TEMP = TEMP - CONJG(A(K,I))*B(K,J)
-  120                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/CONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              ELSE
-                  DO 180 J = 1,N
-                      DO 170 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 150 K = I + 1,M
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  150                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 160 K = I + 1,M
-                                  TEMP = TEMP - CONJG(A(K,I))*B(K,J)
-  160                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/CONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  170                 CONTINUE
-  180             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*inv( A ).
-*
-              IF (UPPER) THEN
-                  DO 230 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 190 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  190                     CONTINUE
-                      END IF
-                      DO 210 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 220 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  220                     CONTINUE
-                      END IF
-  230             CONTINUE
-              ELSE
-                  DO 280 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 240 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  240                     CONTINUE
-                      END IF
-                      DO 260 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 270 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*inv( A**T )
-*           or    B := alpha*B*inv( A**H ).
-*
-              IF (UPPER) THEN
-                  DO 330 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/CONJG(A(K,K))
-                          END IF
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-                      DO 310 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = CONJG(A(J,K))
-                              END IF
-                              DO 300 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  300                         CONTINUE
-                          END IF
-  310                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 320 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  320                     CONTINUE
-                      END IF
-  330             CONTINUE
-              ELSE
-                  DO 380 K = 1,N
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/CONJG(A(K,K))
-                          END IF
-                          DO 340 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  340                     CONTINUE
-                      END IF
-                      DO 360 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = CONJG(A(J,K))
-                              END IF
-                              DO 350 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  350                         CONTINUE
-                          END IF
-  360                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 370 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  370                     CONTINUE
-                      END IF
-  380             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTRSM .
-*
-      END
diff --git a/superlu/BLAS/ctrsv.f b/superlu/BLAS/ctrsv.f
deleted file mode 100644
index 7981a21d..00000000
--- a/superlu/BLAS/ctrsv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b CTRSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE CTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> CTRSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE CTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX ZERO
-      PARAMETER (ZERO= (0.0E+0,0.0E+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('CTRSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - CONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 120 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 130 I = 1,J - 1
-                              TEMP = TEMP - CONJG(A(I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - CONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 180 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 190 I = N,J + 1,-1
-                              TEMP = TEMP - CONJG(A(I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/CONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of CTRSV .
-*
-      END
diff --git a/superlu/BLAS/dasum.f b/superlu/BLAS/dasum.f
deleted file mode 100644
index fd3d9104..00000000
--- a/superlu/BLAS/dasum.f
+++ /dev/null
@@ -1,111 +0,0 @@
-*> \brief \b DASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DASUM takes the sum of the absolute values.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DASUM(N,DX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DTEMP
-      INTEGER I,M,MP1,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DABS,MOD
-*     ..
-      DASUM = 0.0d0
-      DTEMP = 0.0d0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,6)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DTEMP + DABS(DX(I))
-            END DO
-            IF (N.LT.6) THEN
-               DASUM = DTEMP
-               RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,6
-            DTEMP = DTEMP + DABS(DX(I)) + DABS(DX(I+1)) +
-     $              DABS(DX(I+2)) + DABS(DX(I+3)) +
-     $              DABS(DX(I+4)) + DABS(DX(I+5))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            DTEMP = DTEMP + DABS(DX(I))
-         END DO
-      END IF
-      DASUM = DTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/daxpy.f b/superlu/BLAS/daxpy.f
deleted file mode 100644
index 5203e50c..00000000
--- a/superlu/BLAS/daxpy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b DAXPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION DA
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DAXPY constant times a vector plus a vector.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DAXPY(N,DA,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION DA
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (DA.EQ.0.0d0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,4)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DY(I) = DY(I) + DA*DX(I)
-            END DO
-         END IF
-         IF (N.LT.4) RETURN
-         MP1 = M + 1
-         DO I = MP1,N,4
-            DY(I) = DY(I) + DA*DX(I)
-            DY(I+1) = DY(I+1) + DA*DX(I+1)
-            DY(I+2) = DY(I+2) + DA*DX(I+2)
-            DY(I+3) = DY(I+3) + DA*DX(I+3)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-          DY(IY) = DY(IY) + DA*DX(IX)
-          IX = IX + INCX
-          IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dcabs1.f b/superlu/BLAS/dcabs1.f
deleted file mode 100644
index d71fe7af..00000000
--- a/superlu/BLAS/dcabs1.f
+++ /dev/null
@@ -1,58 +0,0 @@
-*> \brief \b DCABS1
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DCABS1(Z)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 Z
-*       ..
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DCABS1(Z)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 Z
-*     ..
-*     ..
-*  =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,DBLE,DIMAG
-*
-      DCABS1 = ABS(DBLE(Z)) + ABS(DIMAG(Z))
-      RETURN
-      END
diff --git a/superlu/BLAS/dcopy.f b/superlu/BLAS/dcopy.f
deleted file mode 100644
index bbc38a75..00000000
--- a/superlu/BLAS/dcopy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b DCOPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DCOPY copies a vector, x, to a vector, y.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,7)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DY(I) = DX(I)
-            END DO
-            IF (N.LT.7) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,7
-            DY(I) = DX(I)
-            DY(I+1) = DX(I+1)
-            DY(I+2) = DX(I+2)
-            DY(I+3) = DX(I+3)
-            DY(I+4) = DX(I+4)
-            DY(I+5) = DX(I+5)
-            DY(I+6) = DX(I+6)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DY(IY) = DX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/ddot.f b/superlu/BLAS/ddot.f
deleted file mode 100644
index 1aea8240..00000000
--- a/superlu/BLAS/ddot.f
+++ /dev/null
@@ -1,117 +0,0 @@
-*> \brief \b DDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DDOT forms the dot product of two vectors.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DTEMP
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      DDOT = 0.0d0
-      DTEMP = 0.0d0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DTEMP + DX(I)*DY(I)
-            END DO
-            IF (N.LT.5) THEN
-               DDOT=DTEMP
-            RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-          DTEMP = DTEMP + DX(I)*DY(I) + DX(I+1)*DY(I+1) +
-     $            DX(I+2)*DY(I+2) + DX(I+3)*DY(I+3) + DX(I+4)*DY(I+4)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = DTEMP + DX(IX)*DY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      DDOT = DTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/dgbmv.f b/superlu/BLAS/dgbmv.f
deleted file mode 100644
index 3769e18b..00000000
--- a/superlu/BLAS/dgbmv.f
+++ /dev/null
@@ -1,370 +0,0 @@
-*> \brief \b DGBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER INCX,INCY,KL,KU,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGBMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*>
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER INCX,INCY,KL,KU,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KUP1,KX,KY,LENX,LENY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 10
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KUP1 = KU + 1
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  DO 110 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DGBMV .
-*
-      END
diff --git a/superlu/BLAS/dgemm.f b/superlu/BLAS/dgemm.f
deleted file mode 100644
index 5c5a2ac2..00000000
--- a/superlu/BLAS/dgemm.f
+++ /dev/null
@@ -1,384 +0,0 @@
-*> \brief \b DGEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,M,N
-*       CHARACTER TRANSA,TRANSB
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*>
-*> where  op( X ) is one of
-*>
-*>    op( X ) = X   or   op( X ) = X**T,
-*>
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c',  op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>
-*>              TRANSB = 'C' or 'c',  op( B ) = B**T.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,M,N
-      CHARACTER TRANSA,TRANSB
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
-      LOGICAL NOTA,NOTB
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-*     and  columns of  A  and the  number of  rows  of  B  respectively.
-*
-      NOTA = LSAME(TRANSA,'N')
-      NOTB = LSAME(TRANSB,'N')
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 8
-      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
-          INFO = 10
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And if  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B + beta*C.
-*
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B + beta*C
-*
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          END IF
-      ELSE
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B**T + beta*C
-*
-              DO 170 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 130 I = 1,M
-                          C(I,J) = ZERO
-  130                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 140 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  140                 CONTINUE
-                  END IF
-                  DO 160 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 150 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  150                 CONTINUE
-  160             CONTINUE
-  170         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B**T + beta*C
-*
-              DO 200 J = 1,N
-                  DO 190 I = 1,M
-                      TEMP = ZERO
-                      DO 180 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  180                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  190             CONTINUE
-  200         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DGEMM .
-*
-      END
diff --git a/superlu/BLAS/dgemv.f b/superlu/BLAS/dgemv.f
deleted file mode 100644
index dd14c350..00000000
--- a/superlu/BLAS/dgemv.f
+++ /dev/null
@@ -1,330 +0,0 @@
-*> \brief \b DGEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGEMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER INCX,INCY,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  DO 90 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  DO 110 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DGEMV .
-*
-      END
diff --git a/superlu/BLAS/dger.f b/superlu/BLAS/dger.f
deleted file mode 100644
index 289141e8..00000000
--- a/superlu/BLAS/dger.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b DGER
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DGER   performs the rank 1 operation
-*>
-*>    A := alpha*x*y**T + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DGER  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of DGER  .
-*
-      END
diff --git a/superlu/BLAS/dnrm2.f b/superlu/BLAS/dnrm2.f
deleted file mode 100644
index 0d7062fd..00000000
--- a/superlu/BLAS/dnrm2.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b DNRM2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>
-*>    DNRM2 := sqrt( x'*x )
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to DLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DNRM2(N,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION ABSXI,NORM,SCALE,SSQ
-      INTEGER IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,SQRT
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE IF (N.EQ.1) THEN
-          NORM = ABS(X(1))
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-*        CALL DLASSQ( N, X, INCX, SCALE, SSQ )
-*
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (X(IX).NE.ZERO) THEN
-                  ABSXI = ABS(X(IX))
-                  IF (SCALE.LT.ABSXI) THEN
-                      SSQ = ONE + SSQ* (SCALE/ABSXI)**2
-                      SCALE = ABSXI
-                  ELSE
-                      SSQ = SSQ + (ABSXI/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-*
-      DNRM2 = NORM
-      RETURN
-*
-*     End of DNRM2.
-*
-      END
diff --git a/superlu/BLAS/drot.f b/superlu/BLAS/drot.f
deleted file mode 100644
index baaae5c9..00000000
--- a/superlu/BLAS/drot.f
+++ /dev/null
@@ -1,101 +0,0 @@
-*> \brief \b DROT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION C,S
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DROT applies a plane rotation.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION C,S
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DTEMP
-      INTEGER I,IX,IY
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*       code for both increments equal to 1
-*
-         DO I = 1,N
-            DTEMP = C*DX(I) + S*DY(I)
-            DY(I) = C*DY(I) - S*DX(I)
-            DX(I) = DTEMP
-         END DO
-      ELSE
-*
-*       code for unequal increments or equal increments not equal
-*         to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = C*DX(IX) + S*DY(IY)
-            DY(IY) = C*DY(IY) - S*DX(IX)
-            DX(IX) = DTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/drotg.f b/superlu/BLAS/drotg.f
deleted file mode 100644
index 85d04cd8..00000000
--- a/superlu/BLAS/drotg.f
+++ /dev/null
@@ -1,86 +0,0 @@
-*> \brief \b DROTG
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DROTG(DA,DB,C,S)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION C,DA,DB,S
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DROTG construct givens plane rotation.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DROTG(DA,DB,C,S)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION C,DA,DB,S
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION R,ROE,SCALE,Z
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DABS,DSIGN,DSQRT
-*     ..
-      ROE = DB
-      IF (DABS(DA).GT.DABS(DB)) ROE = DA
-      SCALE = DABS(DA) + DABS(DB)
-      IF (SCALE.EQ.0.0d0) THEN
-         C = 1.0d0
-         S = 0.0d0
-         R = 0.0d0
-         Z = 0.0d0
-      ELSE
-         R = SCALE*DSQRT((DA/SCALE)**2+ (DB/SCALE)**2)
-         R = DSIGN(1.0d0,ROE)*R
-         C = DA/R
-         S = DB/R
-         Z = 1.0d0
-         IF (DABS(DA).GT.DABS(DB)) Z = S
-         IF (DABS(DB).GE.DABS(DA) .AND. C.NE.0.0d0) Z = 1.0d0/C
-      END IF
-      DA = R
-      DB = Z
-      RETURN
-      END
diff --git a/superlu/BLAS/drotm.f b/superlu/BLAS/drotm.f
deleted file mode 100644
index b006dbd5..00000000
--- a/superlu/BLAS/drotm.f
+++ /dev/null
@@ -1,202 +0,0 @@
-*> \brief \b DROTM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DROTM(N,DX,INCX,DY,INCY,DPARAM)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DPARAM(5),DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX
-*>
-*>    (DX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF DX ARE IN
-*>    (DY**T)
-*>
-*>    DX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
-*>    LX = (-INCX)*N, AND SIMILARLY FOR SY USING LY AND INCY.
-*>    WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*>
-*>    DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0
-*>
-*>      (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0).
-*>    SEE DROTMG FOR A DESCRIPTION OF DATA STORAGE IN DPARAM.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in,out] DX
-*> \verbatim
-*>          DX is DOUBLE PRECISION array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of DX
-*> \endverbatim
-*>
-*> \param[in,out] DY
-*> \verbatim
-*>          DY is DOUBLE PRECISION array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of DY
-*> \endverbatim
-*>
-*> \param[in,out] DPARAM
-*> \verbatim
-*>          DPARAM is DOUBLE PRECISION array, dimension 5
-*>     DPARAM(1)=DFLAG
-*>     DPARAM(2)=DH11
-*>     DPARAM(3)=DH21
-*>     DPARAM(4)=DH12
-*>     DPARAM(5)=DH22
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE DROTM(N,DX,INCX,DY,INCY,DPARAM)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DPARAM(5),DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DFLAG,DH11,DH12,DH21,DH22,TWO,W,Z,ZERO
-      INTEGER I,KX,KY,NSTEPS
-*     ..
-*     .. Data statements ..
-      DATA ZERO,TWO/0.D0,2.D0/
-*     ..
-*
-      DFLAG = DPARAM(1)
-      IF (N.LE.0 .OR. (DFLAG+TWO.EQ.ZERO)) RETURN
-      IF (INCX.EQ.INCY.AND.INCX.GT.0) THEN
-*
-         NSTEPS = N*INCX
-         IF (DFLAG.LT.ZERO) THEN
-            DH11 = DPARAM(2)
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DH22 = DPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = DX(I)
-               Z = DY(I)
-               DX(I) = W*DH11 + Z*DH12
-               DY(I) = W*DH21 + Z*DH22
-            END DO
-         ELSE IF (DFLAG.EQ.ZERO) THEN
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DO I = 1,NSTEPS,INCX
-               W = DX(I)
-               Z = DY(I)
-               DX(I) = W + Z*DH12
-               DY(I) = W*DH21 + Z
-            END DO
-         ELSE
-            DH11 = DPARAM(2)
-            DH22 = DPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = DX(I)
-               Z = DY(I)
-               DX(I) = W*DH11 + Z
-               DY(I) = -W + DH22*Z
-            END DO
-         END IF
-      ELSE
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*
-         IF (DFLAG.LT.ZERO) THEN
-            DH11 = DPARAM(2)
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DH22 = DPARAM(5)
-            DO I = 1,N
-               W = DX(KX)
-               Z = DY(KY)
-               DX(KX) = W*DH11 + Z*DH12
-               DY(KY) = W*DH21 + Z*DH22
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-         ELSE IF (DFLAG.EQ.ZERO) THEN
-            DH12 = DPARAM(4)
-            DH21 = DPARAM(3)
-            DO I = 1,N
-               W = DX(KX)
-               Z = DY(KY)
-               DX(KX) = W + Z*DH12
-               DY(KY) = W*DH21 + Z
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-         ELSE
-             DH11 = DPARAM(2)
-             DH22 = DPARAM(5)
-             DO I = 1,N
-                W = DX(KX)
-                Z = DY(KY)
-                DX(KX) = W*DH11 + Z
-                DY(KY) = -W + DH22*Z
-                KX = KX + INCX
-                KY = KY + INCY
-            END DO
-         END IF
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/drotmg.f b/superlu/BLAS/drotmg.f
deleted file mode 100644
index 1fb025fa..00000000
--- a/superlu/BLAS/drotmg.f
+++ /dev/null
@@ -1,251 +0,0 @@
-*> \brief \b DROTMG
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DROTMG(DD1,DD2,DX1,DY1,DPARAM)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION DD1,DD2,DX1,DY1
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DPARAM(5)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS
-*>    THE SECOND COMPONENT OF THE 2-VECTOR  (DSQRT(DD1)*DX1,DSQRT(DD2)*>    
DY2)**T.
-*>    WITH DPARAM(1)=DFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*>
-*>    DFLAG=-1.D0     DFLAG=0.D0        DFLAG=1.D0     DFLAG=-2.D0
-*>
-*>      (DH11  DH12)    (1.D0  DH12)    (DH11  1.D0)    (1.D0  0.D0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (DH21  DH22),   (DH21  1.D0),   (-1.D0 DH22),   (0.D0  1.D0).
-*>    LOCATIONS 2-4 OF DPARAM CONTAIN DH11, DH21, DH12, AND DH22
-*>    RESPECTIVELY. (VALUES OF 1.D0, -1.D0, OR 0.D0 IMPLIED BY THE
-*>    VALUE OF DPARAM(1) ARE NOT STORED IN DPARAM.)
-*>
-*>    THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE
-*>    INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE
-*>    OF DD1 AND DD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in,out] DD1
-*> \verbatim
-*>          DD1 is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in,out] DD2
-*> \verbatim
-*>          DD2 is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in,out] DX1
-*> \verbatim
-*>          DX1 is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in] DY1
-*> \verbatim
-*>          DY1 is DOUBLE PRECISION
-*> \endverbatim
-*>
-*> \param[in,out] DPARAM
-*> \verbatim
-*>          DPARAM is DOUBLE PRECISION array, dimension 5
-*>     DPARAM(1)=DFLAG
-*>     DPARAM(2)=DH11
-*>     DPARAM(3)=DH21
-*>     DPARAM(4)=DH12
-*>     DPARAM(5)=DH22
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE DROTMG(DD1,DD2,DX1,DY1,DPARAM)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION DD1,DD2,DX1,DY1
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DPARAM(5)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DFLAG,DH11,DH12,DH21,DH22,DP1,DP2,DQ1,DQ2,DTEMP,
-     $                 DU,GAM,GAMSQ,ONE,RGAMSQ,TWO,ZERO
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DABS
-*     ..
-*     .. Data statements ..
-*
-      DATA ZERO,ONE,TWO/0.D0,1.D0,2.D0/
-      DATA GAM,GAMSQ,RGAMSQ/4096.D0,16777216.D0,5.9604645D-8/
-*     ..
-
-      IF (DD1.LT.ZERO) THEN
-*        GO ZERO-H-D-AND-DX1..
-         DFLAG = -ONE
-         DH11 = ZERO
-         DH12 = ZERO
-         DH21 = ZERO
-         DH22 = ZERO
-*
-         DD1 = ZERO
-         DD2 = ZERO
-         DX1 = ZERO
-      ELSE
-*        CASE-DD1-NONNEGATIVE
-         DP2 = DD2*DY1
-         IF (DP2.EQ.ZERO) THEN
-            DFLAG = -TWO
-            DPARAM(1) = DFLAG
-            RETURN
-         END IF
-*        REGULAR-CASE..
-         DP1 = DD1*DX1
-         DQ2 = DP2*DY1
-         DQ1 = DP1*DX1
-*
-         IF (DABS(DQ1).GT.DABS(DQ2)) THEN
-            DH21 = -DY1/DX1
-            DH12 = DP2/DP1
-*
-            DU = ONE - DH12*DH21
-*
-           IF (DU.GT.ZERO) THEN
-             DFLAG = ZERO
-             DD1 = DD1/DU
-             DD2 = DD2/DU
-             DX1 = DX1*DU
-           END IF
-         ELSE
-
-            IF (DQ2.LT.ZERO) THEN
-*              GO ZERO-H-D-AND-DX1..
-               DFLAG = -ONE
-               DH11 = ZERO
-               DH12 = ZERO
-               DH21 = ZERO
-               DH22 = ZERO
-*
-               DD1 = ZERO
-               DD2 = ZERO
-               DX1 = ZERO
-            ELSE
-               DFLAG = ONE
-               DH11 = DP1/DP2
-               DH22 = DX1/DY1
-               DU = ONE + DH11*DH22
-               DTEMP = DD2/DU
-               DD2 = DD1/DU
-               DD1 = DTEMP
-               DX1 = DY1*DU
-            END IF
-         END IF
-
-*     PROCEDURE..SCALE-CHECK
-         IF (DD1.NE.ZERO) THEN
-            DO WHILE ((DD1.LE.RGAMSQ) .OR. (DD1.GE.GAMSQ))
-               IF (DFLAG.EQ.ZERO) THEN
-                  DH11 = ONE
-                  DH22 = ONE
-                  DFLAG = -ONE
-               ELSE
-                  DH21 = -ONE
-                  DH12 = ONE
-                  DFLAG = -ONE
-               END IF
-               IF (DD1.LE.RGAMSQ) THEN
-                  DD1 = DD1*GAM**2
-                  DX1 = DX1/GAM
-                  DH11 = DH11/GAM
-                  DH12 = DH12/GAM
-               ELSE
-                  DD1 = DD1/GAM**2
-                  DX1 = DX1*GAM
-                  DH11 = DH11*GAM
-                  DH12 = DH12*GAM
-               END IF
-            ENDDO
-         END IF
-
-         IF (DD2.NE.ZERO) THEN
-            DO WHILE ( (DABS(DD2).LE.RGAMSQ) .OR. (DABS(DD2).GE.GAMSQ) )
-               IF (DFLAG.EQ.ZERO) THEN
-                  DH11 = ONE
-                  DH22 = ONE
-                  DFLAG = -ONE
-               ELSE
-                  DH21 = -ONE
-                  DH12 = ONE
-                  DFLAG = -ONE
-               END IF
-               IF (DABS(DD2).LE.RGAMSQ) THEN
-                  DD2 = DD2*GAM**2
-                  DH21 = DH21/GAM
-                  DH22 = DH22/GAM
-               ELSE
-                  DD2 = DD2/GAM**2
-                  DH21 = DH21*GAM
-                  DH22 = DH22*GAM
-               END IF
-            END DO
-         END IF
-
-      END IF
-
-      IF (DFLAG.LT.ZERO) THEN
-         DPARAM(2) = DH11
-         DPARAM(3) = DH21
-         DPARAM(4) = DH12
-         DPARAM(5) = DH22
-      ELSE IF (DFLAG.EQ.ZERO) THEN
-         DPARAM(3) = DH21
-         DPARAM(4) = DH12
-      ELSE
-         DPARAM(2) = DH11
-         DPARAM(5) = DH22
-      END IF
-
-      DPARAM(1) = DFLAG
-      RETURN
-      END
-
-
-
-
diff --git a/superlu/BLAS/dsbmv.f b/superlu/BLAS/dsbmv.f
deleted file mode 100644
index aea12134..00000000
--- a/superlu/BLAS/dsbmv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b DSBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER INCX,INCY,K,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSBMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric band matrix, with k super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER INCX,INCY,K,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when upper triangle of A is stored.
-*
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when lower triangle of A is stored.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(1,J)
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(1,J)
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSBMV .
-*
-      END
diff --git a/superlu/BLAS/dscal.f b/superlu/BLAS/dscal.f
deleted file mode 100644
index 8bbfec6f..00000000
--- a/superlu/BLAS/dscal.f
+++ /dev/null
@@ -1,110 +0,0 @@
-*> \brief \b DSCAL
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSCAL(N,DA,DX,INCX)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION DA
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DSCAL scales a vector by a constant.
-*>    uses unrolled loops for increment equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSCAL(N,DA,DX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION DA
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,M,MP1,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DX(I) = DA*DX(I)
-            END DO
-            IF (N.LT.5) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-            DX(I) = DA*DX(I)
-            DX(I+1) = DA*DX(I+1)
-            DX(I+2) = DA*DX(I+2)
-            DX(I+3) = DA*DX(I+3)
-            DX(I+4) = DA*DX(I+4)
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            DX(I) = DA*DX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dsdot.f b/superlu/BLAS/dsdot.f
deleted file mode 100644
index f9cb4980..00000000
--- a/superlu/BLAS/dsdot.f
+++ /dev/null
@@ -1,172 +0,0 @@
-*> \brief \b DSDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DSDOT(N,SX,INCX,SY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*    AUTHORS
-*    =======
-*    Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*    Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> Compute the inner product of two vectors with extended
-*> precision accumulation and result.
-*>
-*> Returns D.P. dot product accumulated in D.P., for S.P. SX and SY
-*> DSDOT = sum for I = 0 to N-1 of  SX(LX+I*INCX) * SY(LY+I*INCY),
-*> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*> defined in a similar way using INCY.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in] SX
-*> \verbatim
-*>          SX is REAL array, dimension(N)
-*>         single precision vector with N elements
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>          storage spacing between elements of SX
-*> \endverbatim
-*>
-*> \param[in] SY
-*> \verbatim
-*>          SY is REAL array, dimension(N)
-*>         single precision vector with N elements
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of SY
-*> \endverbatim
-*>
-*> \result DSDOT
-*> \verbatim
-*>          DSDOT is DOUBLE PRECISION
-*>         DSDOT  double precision dot product (zero if N.LE.0)
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*> \endverbatim
-*
-*> \par References:
-*  ================
-*>
-*> \verbatim
-*>
-*>
-*>  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*>  Krogh, Basic linear algebra subprograms for Fortran
-*>  usage, Algorithm No. 539, Transactions on Mathematical
-*>  Software 5, 3 (September 1979), pp. 308-323.
-*>
-*>  REVISION HISTORY  (YYMMDD)
-*>
-*>  791001  DATE WRITTEN
-*>  890831  Modified array declarations.  (WRB)
-*>  890831  REVISION DATE from Version 3.2
-*>  891214  Prologue converted to Version 4.0 format.  (BAB)
-*>  920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*>  920501  Reformatted the REFERENCES section.  (WRB)
-*>  070118  Reformat to LAPACK style (JL)
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DSDOT(N,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  Authors:
-*  ========
-*  Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*  Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,KX,KY,NS
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE
-*     ..
-      DSDOT = 0.0D0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
-*
-*     Code for equal, positive, non-unit increments.
-*
-         NS = N*INCX
-         DO I = 1,NS,INCX
-            DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-         END DO
-      ELSE
-*
-*     Code for unequal or nonpositive increments.
-*
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-         DO I = 1,N
-            DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-            KX = KX + INCX
-            KY = KY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dspmv.f b/superlu/BLAS/dspmv.f
deleted file mode 100644
index 72a28fed..00000000
--- a/superlu/BLAS/dspmv.f
+++ /dev/null
@@ -1,331 +0,0 @@
-*> \brief \b DSPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSPMV  performs the matrix-vector operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 6
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when AP contains the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when AP contains the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*AP(KK)
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*AP(KK)
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSPMV .
-*
-      END
diff --git a/superlu/BLAS/dspr.f b/superlu/BLAS/dspr.f
deleted file mode 100644
index e89f87d4..00000000
--- a/superlu/BLAS/dspr.f
+++ /dev/null
@@ -1,261 +0,0 @@
-*> \brief \b DSPR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSPR    performs the symmetric rank 1 operation
-*>
-*>    A := alpha*x*x**T + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSPR  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSPR  .
-*
-      END
diff --git a/superlu/BLAS/dspr2.f b/superlu/BLAS/dspr2.f
deleted file mode 100644
index 4cd416f5..00000000
--- a/superlu/BLAS/dspr2.f
+++ /dev/null
@@ -1,296 +0,0 @@
-*> \brief \b DSPR2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSPR2  performs the symmetric rank 2 operation
-*>
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSPR2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSPR2 .
-*
-      END
diff --git a/superlu/BLAS/dswap.f b/superlu/BLAS/dswap.f
deleted file mode 100644
index 5bd8f7d2..00000000
--- a/superlu/BLAS/dswap.f
+++ /dev/null
@@ -1,122 +0,0 @@
-*> \brief \b DSWAP
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSWAP(N,DX,INCX,DY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*),DY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    interchanges two vectors.
-*>    uses unrolled loops for increments equal one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSWAP(N,DX,INCX,DY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*),DY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DTEMP
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*       code for both increments equal to 1
-*
-*
-*       clean-up loop
-*
-         M = MOD(N,3)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               DTEMP = DX(I)
-               DX(I) = DY(I)
-               DY(I) = DTEMP
-            END DO
-            IF (N.LT.3) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,3
-            DTEMP = DX(I)
-            DX(I) = DY(I)
-            DY(I) = DTEMP
-            DTEMP = DX(I+1)
-            DX(I+1) = DY(I+1)
-            DY(I+1) = DTEMP
-            DTEMP = DX(I+2)
-            DX(I+2) = DY(I+2)
-            DY(I+2) = DTEMP
-         END DO
-      ELSE
-*
-*       code for unequal increments or equal increments not equal
-*         to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            DTEMP = DX(IX)
-            DX(IX) = DY(IY)
-            DY(IY) = DTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/dsymm.f b/superlu/BLAS/dsymm.f
deleted file mode 100644
index 77c797ea..00000000
--- a/superlu/BLAS/dsymm.f
+++ /dev/null
@@ -1,367 +0,0 @@
-*> \brief \b DSYMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER LDA,LDB,LDC,M,N
-*       CHARACTER SIDE,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSYMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*A*B + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*B*A + beta*C,
-*>
-*> where alpha and beta are scalars,  A is a symmetric matrix and  B and
-*> C are  m by n matrices.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER LDA,LDB,LDC,M,N
-      CHARACTER SIDE,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Set NROWA as the number of rows of A.
-*
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(SIDE,'L')) THEN
-*
-*        Form  C := alpha*A*B + beta*C.
-*
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*B*A + beta*C.
-*
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of DSYMM .
-*
-      END
diff --git a/superlu/BLAS/dsymv.f b/superlu/BLAS/dsymv.f
deleted file mode 100644
index af2dfd2a..00000000
--- a/superlu/BLAS/dsymv.f
+++ /dev/null
@@ -1,333 +0,0 @@
-*> \brief \b DSYMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSYMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 5
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when A is stored in upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(J,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(J,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when A is stored in lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(J,J)
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(J,J)
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSYMV .
-*
-      END
diff --git a/superlu/BLAS/dsyr.f b/superlu/BLAS/dsyr.f
deleted file mode 100644
index c998ee82..00000000
--- a/superlu/BLAS/dsyr.f
+++ /dev/null
@@ -1,263 +0,0 @@
-*> \brief \b DSYR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSYR(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSYR   performs the symmetric rank 1 operation
-*>
-*>    A := alpha*x*x**T + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSYR(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYR  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in upper triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in lower triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSYR  .
-*
-      END
diff --git a/superlu/BLAS/dsyr2.f b/superlu/BLAS/dsyr2.f
deleted file mode 100644
index 8bfa5fe0..00000000
--- a/superlu/BLAS/dsyr2.f
+++ /dev/null
@@ -1,298 +0,0 @@
-*> \brief \b DSYR2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSYR2  performs the symmetric rank 2 operation
-*>
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n symmetric matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYR2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSYR2 .
-*
-      END
diff --git a/superlu/BLAS/dsyr2k.f b/superlu/BLAS/dsyr2k.f
deleted file mode 100644
index 6dd7ca29..00000000
--- a/superlu/BLAS/dsyr2k.f
+++ /dev/null
@@ -1,399 +0,0 @@
-*> \brief \b DSYR2K
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSYR2K  performs one of the symmetric rank 2k operations
-*>
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*B**T + alpha*B*A**T +
-*>                                        beta*C.
-*>
-*>              TRANS = 'T' or 't'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*>
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrices  A and B.  K must be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP1,TEMP2
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYR2K',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSYR2K.
-*
-      END
diff --git a/superlu/BLAS/dsyrk.f b/superlu/BLAS/dsyrk.f
deleted file mode 100644
index bd70dfba..00000000
--- a/superlu/BLAS/dsyrk.f
+++ /dev/null
@@ -1,364 +0,0 @@
-*> \brief \b DSYRK
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER K,LDA,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DSYRK  performs one of the symmetric rank k operations
-*>
-*>    C := alpha*A*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*>
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrix  A.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is DOUBLE PRECISION array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER K,LDA,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DSYRK ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*A**T + beta*C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*A + beta*C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DSYRK .
-*
-      END
diff --git a/superlu/BLAS/dtbmv.f b/superlu/BLAS/dtbmv.f
deleted file mode 100644
index 20dd83ea..00000000
--- a/superlu/BLAS/dtbmv.f
+++ /dev/null
@@ -1,398 +0,0 @@
-*> \brief \b DTBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTBMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*         Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 90 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 110 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 130 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 150 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTBMV .
-*
-      END
diff --git a/superlu/BLAS/dtbsv.f b/superlu/BLAS/dtbsv.f
deleted file mode 100644
index ad468288..00000000
--- a/superlu/BLAS/dtbsv.f
+++ /dev/null
@@ -1,401 +0,0 @@
-*> \brief \b DTBSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTBSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTBSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T)*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      DO 90 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      DO 110 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      DO 130 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      DO 150 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTBSV .
-*
-      END
diff --git a/superlu/BLAS/dtpmv.f b/superlu/BLAS/dtpmv.f
deleted file mode 100644
index 3b0e6209..00000000
--- a/superlu/BLAS/dtpmv.f
+++ /dev/null
@@ -1,352 +0,0 @@
-*> \brief \b DTPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTPMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x:= A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK - 1
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K - 1
-   90                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK - J
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 110 K = KK - 1,KK - J + 1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK + 1
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K + 1
-  130                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 150 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTPMV .
-*
-      END
diff --git a/superlu/BLAS/dtpsv.f b/superlu/BLAS/dtpsv.f
deleted file mode 100644
index a5d9faa4..00000000
--- a/superlu/BLAS/dtpsv.f
+++ /dev/null
@@ -1,354 +0,0 @@
-*> \brief \b DTPSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTPSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is DOUBLE PRECISION array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTPSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K + 1
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(J) = TEMP
-                      KK = KK + J
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 K = KK,KK + J - 2
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K - 1
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 K = KK,KK - (N- (J+1)),-1
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTPSV .
-*
-      END
diff --git a/superlu/BLAS/dtrmm.f b/superlu/BLAS/dtrmm.f
deleted file mode 100644
index e315d596..00000000
--- a/superlu/BLAS/dtrmm.f
+++ /dev/null
@@ -1,415 +0,0 @@
-*> \brief \b DTRMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTRMM  performs one of the matrix-matrix operations
-*>
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
-*>
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>           A is DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTRMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*A*B.
-*
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*A**T*B.
-*
-              IF (UPPER) THEN
-                  DO 110 J = 1,N
-                      DO 100 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 90 K = 1,I - 1
-                              TEMP = TEMP + A(K,I)*B(K,J)
-   90                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  100                 CONTINUE
-  110             CONTINUE
-              ELSE
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 120 K = I + 1,M
-                              TEMP = TEMP + A(K,I)*B(K,J)
-  120                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*A.
-*
-              IF (UPPER) THEN
-                  DO 180 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  150                 CONTINUE
-                      DO 170 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 160 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  160                         CONTINUE
-                          END IF
-  170                 CONTINUE
-  180             CONTINUE
-              ELSE
-                  DO 220 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 190 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  190                 CONTINUE
-                      DO 210 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-  220             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*A**T.
-*
-              IF (UPPER) THEN
-                  DO 260 K = 1,N
-                      DO 240 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 250 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              ELSE
-                  DO 300 K = N,1,-1
-                      DO 280 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 270 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  270                         CONTINUE
-                          END IF
-  280                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-  300             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTRMM .
-*
-      END
diff --git a/superlu/BLAS/dtrmv.f b/superlu/BLAS/dtrmv.f
deleted file mode 100644
index 83959d06..00000000
--- a/superlu/BLAS/dtrmv.f
+++ /dev/null
@@ -1,342 +0,0 @@
-*> \brief \b DTRMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTRMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTRMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 110 I = J - 1,1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + A(I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = J + 1,N
-                          IX = IX + INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTRMV .
-*
-      END
diff --git a/superlu/BLAS/dtrsm.f b/superlu/BLAS/dtrsm.f
deleted file mode 100644
index bc440f06..00000000
--- a/superlu/BLAS/dtrsm.f
+++ /dev/null
@@ -1,443 +0,0 @@
-*> \brief \b DTRSM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTRSM  solves one of the matrix equations
-*>
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*>
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T.
-*>
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is DOUBLE PRECISION array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE DTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTRSM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*inv( A )*B.
-*
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*inv( A**T )*B.
-*
-              IF (UPPER) THEN
-                  DO 130 J = 1,N
-                      DO 120 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          DO 110 K = 1,I - 1
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  110                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  120                 CONTINUE
-  130             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          DO 140 K = I + 1,M
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  140                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*inv( A ).
-*
-              IF (UPPER) THEN
-                  DO 210 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 170 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  170                     CONTINUE
-                      END IF
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 200 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  200                     CONTINUE
-                      END IF
-  210             CONTINUE
-              ELSE
-                  DO 260 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 220 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  220                     CONTINUE
-                      END IF
-                      DO 240 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 250 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*inv( A**T ).
-*
-              IF (UPPER) THEN
-                  DO 310 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-                      DO 290 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 280 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  280                         CONTINUE
-                          END IF
-  290                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 300 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  300                     CONTINUE
-                      END IF
-  310             CONTINUE
-              ELSE
-                  DO 360 K = 1,N
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 320 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  320                     CONTINUE
-                      END IF
-                      DO 340 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 330 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  330                         CONTINUE
-                          END IF
-  340                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 350 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  350                     CONTINUE
-                      END IF
-  360             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTRSM .
-*
-      END
diff --git a/superlu/BLAS/dtrsv.f b/superlu/BLAS/dtrsv.f
deleted file mode 100644
index cab3fd98..00000000
--- a/superlu/BLAS/dtrsv.f
+++ /dev/null
@@ -1,338 +0,0 @@
-*> \brief \b DTRSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DTRSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is DOUBLE PRECISION array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is DOUBLE PRECISION array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE DTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ZERO
-      PARAMETER (ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('DTRSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of DTRSV .
-*
-      END
diff --git a/superlu/BLAS/dzasum.f b/superlu/BLAS/dzasum.f
deleted file mode 100644
index 9f0d3fd0..00000000
--- a/superlu/BLAS/dzasum.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b DZASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    DZASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
-*>    returns a single precision result.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION STEMP
-      INTEGER I,NINCX
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION DCABS1
-      EXTERNAL DCABS1
-*     ..
-      DZASUM = 0.0d0
-      STEMP = 0.0d0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            STEMP = STEMP + DCABS1(ZX(I))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + DCABS1(ZX(I))
-         END DO
-      END IF
-      DZASUM = STEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/dznrm2.f b/superlu/BLAS/dznrm2.f
deleted file mode 100644
index 3b6bf613..00000000
--- a/superlu/BLAS/dznrm2.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b DZNRM2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> DZNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>
-*>    DZNRM2 := sqrt( x**H*x )
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup double_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to ZLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      DOUBLE PRECISION FUNCTION DZNRM2(N,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*     .. Local Scalars ..
-      DOUBLE PRECISION NORM,SCALE,SSQ,TEMP
-      INTEGER IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,DBLE,DIMAG,SQRT
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-*        CALL ZLASSQ( N, X, INCX, SCALE, SSQ )
-*
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (DBLE(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(DBLE(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-              IF (DIMAG(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(DIMAG(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-*
-      DZNRM2 = NORM
-      RETURN
-*
-*     End of DZNRM2.
-*
-      END
diff --git a/superlu/BLAS/icamax.f b/superlu/BLAS/icamax.f
deleted file mode 100644
index 37035c7a..00000000
--- a/superlu/BLAS/icamax.f
+++ /dev/null
@@ -1,107 +0,0 @@
-*> \brief \b ICAMAX
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       INTEGER FUNCTION ICAMAX(N,CX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ICAMAX finds the index of the first element having maximum |Re(.)| + 
|Im(.)|
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      INTEGER FUNCTION ICAMAX(N,CX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL SMAX
-      INTEGER I,IX
-*     ..
-*     .. External Functions ..
-      REAL SCABS1
-      EXTERNAL SCABS1
-*     ..
-      ICAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      ICAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         SMAX = SCABS1(CX(1))
-         DO I = 2,N
-            IF (SCABS1(CX(I)).GT.SMAX) THEN
-               ICAMAX = I
-               SMAX = SCABS1(CX(I))
-            END IF
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         IX = 1
-         SMAX = SCABS1(CX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (SCABS1(CX(IX)).GT.SMAX) THEN
-               ICAMAX = I
-               SMAX = SCABS1(CX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/idamax.f b/superlu/BLAS/idamax.f
deleted file mode 100644
index 95856602..00000000
--- a/superlu/BLAS/idamax.f
+++ /dev/null
@@ -1,106 +0,0 @@
-*> \brief \b IDAMAX
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       INTEGER FUNCTION IDAMAX(N,DX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       DOUBLE PRECISION DX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    IDAMAX finds the index of the first element having maximum absolute 
value.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      INTEGER FUNCTION IDAMAX(N,DX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      DOUBLE PRECISION DX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DMAX
-      INTEGER I,IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DABS
-*     ..
-      IDAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      IDAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DMAX = DABS(DX(1))
-         DO I = 2,N
-            IF (DABS(DX(I)).GT.DMAX) THEN
-               IDAMAX = I
-               DMAX = DABS(DX(I))
-            END IF
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         IX = 1
-         DMAX = DABS(DX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (DABS(DX(IX)).GT.DMAX) THEN
-               IDAMAX = I
-               DMAX = DABS(DX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/isamax.f b/superlu/BLAS/isamax.f
deleted file mode 100644
index e7312235..00000000
--- a/superlu/BLAS/isamax.f
+++ /dev/null
@@ -1,106 +0,0 @@
-*> \brief \b ISAMAX
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       INTEGER FUNCTION ISAMAX(N,SX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ISAMAX finds the index of the first element having maximum absolute 
value.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      INTEGER FUNCTION ISAMAX(N,SX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL SMAX
-      INTEGER I,IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS
-*     ..
-      ISAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      ISAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         SMAX = ABS(SX(1))
-         DO I = 2,N
-            IF (ABS(SX(I)).GT.SMAX) THEN
-               ISAMAX = I
-               SMAX = ABS(SX(I))
-            END IF
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         IX = 1
-         SMAX = ABS(SX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (ABS(SX(IX)).GT.SMAX) THEN
-               ISAMAX = I
-               SMAX = ABS(SX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/izamax.f b/superlu/BLAS/izamax.f
deleted file mode 100644
index 2ee9b664..00000000
--- a/superlu/BLAS/izamax.f
+++ /dev/null
@@ -1,107 +0,0 @@
-*> \brief \b IZAMAX
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       INTEGER FUNCTION IZAMAX(N,ZX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    IZAMAX finds the index of the first element having maximum |Re(.)| + 
|Im(.)|
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 1/15/85.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      INTEGER FUNCTION IZAMAX(N,ZX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DMAX
-      INTEGER I,IX
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION DCABS1
-      EXTERNAL DCABS1
-*     ..
-      IZAMAX = 0
-      IF (N.LT.1 .OR. INCX.LE.0) RETURN
-      IZAMAX = 1
-      IF (N.EQ.1) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DMAX = DCABS1(ZX(1))
-         DO I = 2,N
-            IF (DCABS1(ZX(I)).GT.DMAX) THEN
-               IZAMAX = I
-               DMAX = DCABS1(ZX(I))
-            END IF
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         IX = 1
-         DMAX = DCABS1(ZX(1))
-         IX = IX + INCX
-         DO I = 2,N
-            IF (DCABS1(ZX(IX)).GT.DMAX) THEN
-               IZAMAX = I
-               DMAX = DCABS1(ZX(IX))
-            END IF
-            IX = IX + INCX
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/lsame.f b/superlu/BLAS/lsame.f
deleted file mode 100644
index d8194786..00000000
--- a/superlu/BLAS/lsame.f
+++ /dev/null
@@ -1,125 +0,0 @@
-*> \brief \b LSAME
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       LOGICAL FUNCTION LSAME(CA,CB)
-*
-*       .. Scalar Arguments ..
-*       CHARACTER CA,CB
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> LSAME returns .TRUE. if CA is the same letter as CB regardless of
-*> case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] CA
-*> \verbatim
-*>          CA is CHARACTER*1
-*> \endverbatim
-*>
-*> \param[in] CB
-*> \verbatim
-*>          CB is CHARACTER*1
-*>          CA and CB specify the single characters to be compared.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*  =====================================================================
-      LOGICAL FUNCTION LSAME(CA,CB)
-*
-*  -- Reference BLAS level1 routine (version 3.1) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      CHARACTER CA,CB
-*     ..
-*
-* =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC ICHAR
-*     ..
-*     .. Local Scalars ..
-      INTEGER INTA,INTB,ZCODE
-*     ..
-*
-*     Test if the characters are equal
-*
-      LSAME = CA .EQ. CB
-      IF (LSAME) RETURN
-*
-*     Now test for equivalence if both characters are alphabetic.
-*
-      ZCODE = ICHAR('Z')
-*
-*     Use 'Z' rather than 'A' so that ASCII can be detected on Prime
-*     machines, on which ICHAR returns a value with bit 8 set.
-*     ICHAR('A') on Prime machines returns 193 which is the same as
-*     ICHAR('A') on an EBCDIC machine.
-*
-      INTA = ICHAR(CA)
-      INTB = ICHAR(CB)
-*
-      IF (ZCODE.EQ.90 .OR. ZCODE.EQ.122) THEN
-*
-*        ASCII is assumed - ZCODE is the ASCII code of either lower or
-*        upper case 'Z'.
-*
-          IF (INTA.GE.97 .AND. INTA.LE.122) INTA = INTA - 32
-          IF (INTB.GE.97 .AND. INTB.LE.122) INTB = INTB - 32
-*
-      ELSE IF (ZCODE.EQ.233 .OR. ZCODE.EQ.169) THEN
-*
-*        EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or
-*        upper case 'Z'.
-*
-          IF (INTA.GE.129 .AND. INTA.LE.137 .OR.
-     +        INTA.GE.145 .AND. INTA.LE.153 .OR.
-     +        INTA.GE.162 .AND. INTA.LE.169) INTA = INTA + 64
-          IF (INTB.GE.129 .AND. INTB.LE.137 .OR.
-     +        INTB.GE.145 .AND. INTB.LE.153 .OR.
-     +        INTB.GE.162 .AND. INTB.LE.169) INTB = INTB + 64
-*
-      ELSE IF (ZCODE.EQ.218 .OR. ZCODE.EQ.250) THEN
-*
-*        ASCII is assumed, on Prime machines - ZCODE is the ASCII code
-*        plus 128 of either lower or upper case 'Z'.
-*
-          IF (INTA.GE.225 .AND. INTA.LE.250) INTA = INTA - 32
-          IF (INTB.GE.225 .AND. INTB.LE.250) INTB = INTB - 32
-      END IF
-      LSAME = INTA .EQ. INTB
-*
-*     RETURN
-*
-*     End of LSAME
-*
-      END
diff --git a/superlu/BLAS/sasum.f b/superlu/BLAS/sasum.f
deleted file mode 100644
index a453ff70..00000000
--- a/superlu/BLAS/sasum.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b SASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SASUM(N,SX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SASUM takes the sum of the absolute values.
-*>    uses unrolled loops for increment equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SASUM(N,SX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,M,MP1,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,MOD
-*     ..
-      SASUM = 0.0e0
-      STEMP = 0.0e0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*        code for increment equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,6)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = STEMP + ABS(SX(I))
-            END DO
-            IF (N.LT.6) THEN
-               SASUM = STEMP
-               RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,6
-            STEMP = STEMP + ABS(SX(I)) + ABS(SX(I+1)) +
-     $              ABS(SX(I+2)) + ABS(SX(I+3)) +
-     $              ABS(SX(I+4)) + ABS(SX(I+5))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + ABS(SX(I))
-         END DO
-      END IF
-      SASUM = STEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/saxpy.f b/superlu/BLAS/saxpy.f
deleted file mode 100644
index 610dfe79..00000000
--- a/superlu/BLAS/saxpy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b SAXPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SAXPY(N,SA,SX,INCX,SY,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL SA
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SAXPY constant times a vector plus a vector.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SAXPY(N,SA,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL SA
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (SA.EQ.0.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,4)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               SY(I) = SY(I) + SA*SX(I)
-            END DO
-         END IF
-         IF (N.LT.4) RETURN
-         MP1 = M + 1
-         DO I = MP1,N,4
-            SY(I) = SY(I) + SA*SX(I)
-            SY(I+1) = SY(I+1) + SA*SX(I+1)
-            SY(I+2) = SY(I+2) + SA*SX(I+2)
-            SY(I+3) = SY(I+3) + SA*SX(I+3)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-          SY(IY) = SY(IY) + SA*SX(IX)
-          IX = IX + INCX
-          IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/scabs1.f b/superlu/BLAS/scabs1.f
deleted file mode 100644
index b68f76f2..00000000
--- a/superlu/BLAS/scabs1.f
+++ /dev/null
@@ -1,57 +0,0 @@
-*> \brief \b SCABS1
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SCABS1(Z)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX Z
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SCABS1 computes |Re(.)| + |Im(.)| of a complex number
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*  =====================================================================
-      REAL FUNCTION SCABS1(Z)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX Z
-*     ..
-*
-*  =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,AIMAG,REAL
-*     ..
-      SCABS1 = ABS(REAL(Z)) + ABS(AIMAG(Z))
-      RETURN
-      END
diff --git a/superlu/BLAS/scasum.f b/superlu/BLAS/scasum.f
deleted file mode 100644
index 5fc1a56a..00000000
--- a/superlu/BLAS/scasum.f
+++ /dev/null
@@ -1,97 +0,0 @@
-*> \brief \b SCASUM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SCASUM(N,CX,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX CX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
-*>    returns a single precision result.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SCASUM(N,CX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX CX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,AIMAG,REAL
-*     ..
-      SCASUM = 0.0e0
-      STEMP = 0.0e0
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I)))
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            STEMP = STEMP + ABS(REAL(CX(I))) + ABS(AIMAG(CX(I)))
-         END DO
-      END IF
-      SCASUM = STEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/scnrm2.f b/superlu/BLAS/scnrm2.f
deleted file mode 100644
index 4f1f03a5..00000000
--- a/superlu/BLAS/scnrm2.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b SCNRM2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SCNRM2(N,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SCNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>
-*>    SCNRM2 := sqrt( x**H*x )
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to CLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SCNRM2(N,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL NORM,SCALE,SSQ,TEMP
-      INTEGER IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,AIMAG,REAL,SQRT
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-*        CALL CLASSQ( N, X, INCX, SCALE, SSQ )
-*
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (REAL(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(REAL(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-              IF (AIMAG(X(IX)).NE.ZERO) THEN
-                  TEMP = ABS(AIMAG(X(IX)))
-                  IF (SCALE.LT.TEMP) THEN
-                      SSQ = ONE + SSQ* (SCALE/TEMP)**2
-                      SCALE = TEMP
-                  ELSE
-                      SSQ = SSQ + (TEMP/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-*
-      SCNRM2 = NORM
-      RETURN
-*
-*     End of SCNRM2.
-*
-      END
diff --git a/superlu/BLAS/scopy.f b/superlu/BLAS/scopy.f
deleted file mode 100644
index 47557971..00000000
--- a/superlu/BLAS/scopy.f
+++ /dev/null
@@ -1,115 +0,0 @@
-*> \brief \b SCOPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SCOPY(N,SX,INCX,SY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SCOPY copies a vector, x, to a vector, y.
-*>    uses unrolled loops for increments equal to 1.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SCOPY(N,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,7)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               SY(I) = SX(I)
-            END DO
-            IF (N.LT.7) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,7
-            SY(I) = SX(I)
-            SY(I+1) = SX(I+1)
-            SY(I+2) = SX(I+2)
-            SY(I+3) = SX(I+3)
-            SY(I+4) = SX(I+4)
-            SY(I+5) = SX(I+5)
-            SY(I+6) = SX(I+6)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            SY(IY) = SX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/sdot.f b/superlu/BLAS/sdot.f
deleted file mode 100644
index 5a54ee24..00000000
--- a/superlu/BLAS/sdot.f
+++ /dev/null
@@ -1,117 +0,0 @@
-*> \brief \b SDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SDOT(N,SX,INCX,SY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SDOT forms the dot product of two vectors.
-*>    uses unrolled loops for increments equal to one.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SDOT(N,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      STEMP = 0.0e0
-      SDOT = 0.0e0
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = STEMP + SX(I)*SY(I)
-            END DO
-            IF (N.LT.5) THEN
-               SDOT=STEMP
-            RETURN
-            END IF
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-          STEMP = STEMP + SX(I)*SY(I) + SX(I+1)*SY(I+1) +
-     $            SX(I+2)*SY(I+2) + SX(I+3)*SY(I+3) + SX(I+4)*SY(I+4)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = STEMP + SX(IX)*SY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      SDOT = STEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/sdsdot.f b/superlu/BLAS/sdsdot.f
deleted file mode 100644
index 7ee6ad6b..00000000
--- a/superlu/BLAS/sdsdot.f
+++ /dev/null
@@ -1,255 +0,0 @@
-*> \brief \b SDSDOT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL SB
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*    PURPOSE
-*    =======
-*
-*    Compute the inner product of two vectors with extended
-*    precision accumulation.
-*
-*    Returns S.P. result with dot product accumulated in D.P.
-*    SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
-*    where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*    defined in a similar way using INCY.
-*
-*    AUTHOR
-*    ======
-*    Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*    Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*
-*    ARGUMENTS
-*    =========
-*
-*    N      (input) INTEGER
-*           number of elements in input vector(s)
-*
-*    SB     (input) REAL
-*           single precision scalar to be added to inner product
-*
-*    SX     (input) REAL array, dimension (N)
-*           single precision vector with N elements
-*
-*    INCX   (input) INTEGER
-*           storage spacing between elements of SX
-*
-*    SY     (input) REAL array, dimension (N)
-*           single precision vector with N elements
-*
-*    INCY   (input) INTEGER
-*           storage spacing between elements of SY
-*
-*    SDSDOT (output) REAL
-*           single precision dot product (SB if N .LE. 0)
-*
-*    Further Details
-*    ===============
-*
-*    REFERENCES
-*
-*    C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*    Krogh, Basic linear algebra subprograms for Fortran
-*    usage, Algorithm No. 539, Transactions on Mathematical
-*    Software 5, 3 (September 1979), pp. 308-323.
-*
-*    REVISION HISTORY  (YYMMDD)
-*
-*    791001  DATE WRITTEN
-*    890531  Changed all specific intrinsics to generic.  (WRB)
-*    890831  Modified array declarations.  (WRB)
-*    890831  REVISION DATE from Version 3.2
-*    891214  Prologue converted to Version 4.0 format.  (BAB)
-*    920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*    920501  Reformatted the REFERENCES section.  (WRB)
-*    070118  Reformat to LAPACK coding style
-*
-*    =====================================================================
-*
-*       .. Local Scalars ..
-*       DOUBLE PRECISION DSDOT
-*       INTEGER I,KX,KY,NS
-*       ..
-*       .. Intrinsic Functions ..
-*       INTRINSIC DBLE
-*       ..
-*       DSDOT = SB
-*       IF (N.LE.0) THEN
-*          SDSDOT = DSDOT
-*          RETURN
-*       END IF
-*       IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
-*
-*       Code for equal and positive increments.
-*
-*          NS = N*INCX
-*          DO I = 1,NS,INCX
-*             DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-*          END DO
-*       ELSE
-*
-*       Code for unequal or nonpositive increments.
-*
-*          KX = 1
-*          KY = 1
-*          IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-*          IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*          DO I = 1,N
-*             DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-*             KX = KX + INCX
-*             KY = KY + INCY
-*          END DO
-*       END IF
-*       SDSDOT = DSDOT
-*       RETURN
-*       END
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*  =====================================================================
-      REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL SB
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  PURPOSE
-*  =======
-*
-*  Compute the inner product of two vectors with extended
-*  precision accumulation.
-*
-*  Returns S.P. result with dot product accumulated in D.P.
-*  SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
-*  where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
-*  defined in a similar way using INCY.
-*
-*  AUTHOR
-*  ======
-*  Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
-*  Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
-*
-*  ARGUMENTS
-*  =========
-*
-*  N      (input) INTEGER
-*         number of elements in input vector(s)
-*
-*  SB     (input) REAL
-*         single precision scalar to be added to inner product
-*
-*  SX     (input) REAL array, dimension (N)
-*         single precision vector with N elements
-*
-*  INCX   (input) INTEGER
-*         storage spacing between elements of SX
-*
-*  SY     (input) REAL array, dimension (N)
-*         single precision vector with N elements
-*
-*  INCY   (input) INTEGER
-*         storage spacing between elements of SY
-*
-*  SDSDOT (output) REAL
-*         single precision dot product (SB if N .LE. 0)
-*
-*  Further Details
-*  ===============
-*
-*  REFERENCES
-*
-*  C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
-*  Krogh, Basic linear algebra subprograms for Fortran
-*  usage, Algorithm No. 539, Transactions on Mathematical
-*  Software 5, 3 (September 1979), pp. 308-323.
-*
-*  REVISION HISTORY  (YYMMDD)
-*
-*  791001  DATE WRITTEN
-*  890531  Changed all specific intrinsics to generic.  (WRB)
-*  890831  Modified array declarations.  (WRB)
-*  890831  REVISION DATE from Version 3.2
-*  891214  Prologue converted to Version 4.0 format.  (BAB)
-*  920310  Corrected definition of LX in DESCRIPTION.  (WRB)
-*  920501  Reformatted the REFERENCES section.  (WRB)
-*  070118  Reformat to LAPACK coding style
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      DOUBLE PRECISION DSDOT
-      INTEGER I,KX,KY,NS
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE
-*     ..
-      DSDOT = SB
-      IF (N.LE.0) THEN
-         SDSDOT = DSDOT
-         RETURN
-      END IF
-      IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
-*
-*     Code for equal and positive increments.
-*
-         NS = N*INCX
-         DO I = 1,NS,INCX
-            DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
-         END DO
-      ELSE
-*
-*     Code for unequal or nonpositive increments.
-*
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-         DO I = 1,N
-            DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
-            KX = KX + INCX
-            KY = KY + INCY
-         END DO
-      END IF
-      SDSDOT = DSDOT
-      RETURN
-      END
diff --git a/superlu/BLAS/sgbmv.f b/superlu/BLAS/sgbmv.f
deleted file mode 100644
index 92896324..00000000
--- a/superlu/BLAS/sgbmv.f
+++ /dev/null
@@ -1,370 +0,0 @@
-*> \brief \b SGBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER INCX,INCY,KL,KU,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SGBMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*>
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER INCX,INCY,KL,KU,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KUP1,KX,KY,LENX,LENY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 10
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SGBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KUP1 = KU + 1
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  DO 110 I = MAX(1,J-KU),MIN(M,J+KL)
-                      TEMP = TEMP + A(K+I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SGBMV .
-*
-      END
diff --git a/superlu/BLAS/sgemm.f b/superlu/BLAS/sgemm.f
deleted file mode 100644
index d7bdb9c4..00000000
--- a/superlu/BLAS/sgemm.f
+++ /dev/null
@@ -1,384 +0,0 @@
-*> \brief \b SGEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,M,N
-*       CHARACTER TRANSA,TRANSB
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SGEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*>
-*> where  op( X ) is one of
-*>
-*>    op( X ) = X   or   op( X ) = X**T,
-*>
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c',  op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>
-*>              TRANSB = 'C' or 'c',  op( B ) = B**T.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,M,N
-      CHARACTER TRANSA,TRANSB
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
-      LOGICAL NOTA,NOTB
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     transposed and set  NROWA, NCOLA and  NROWB  as the number of rows
-*     and  columns of  A  and the  number of  rows  of  B  respectively.
-*
-      NOTA = LSAME(TRANSA,'N')
-      NOTB = LSAME(TRANSB,'N')
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.LSAME(TRANSA,'C')) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.NOTB) .AND. (.NOT.LSAME(TRANSB,'C')) .AND.
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 8
-      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
-          INFO = 10
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SGEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And if  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B + beta*C.
-*
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B + beta*C
-*
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          END IF
-      ELSE
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B**T + beta*C
-*
-              DO 170 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 130 I = 1,M
-                          C(I,J) = ZERO
-  130                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 140 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  140                 CONTINUE
-                  END IF
-                  DO 160 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 150 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  150                 CONTINUE
-  160             CONTINUE
-  170         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B**T + beta*C
-*
-              DO 200 J = 1,N
-                  DO 190 I = 1,M
-                      TEMP = ZERO
-                      DO 180 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  180                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  190             CONTINUE
-  200         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SGEMM .
-*
-      END
diff --git a/superlu/BLAS/sgemv.f b/superlu/BLAS/sgemv.f
deleted file mode 100644
index 0dfb1fc0..00000000
--- a/superlu/BLAS/sgemv.f
+++ /dev/null
@@ -1,330 +0,0 @@
-*> \brief \b SGEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SGEMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**T*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER INCX,INCY,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SGEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 100 J = 1,N
-                  TEMP = ZERO
-                  DO 90 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  100         CONTINUE
-          ELSE
-              DO 120 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  DO 110 I = 1,M
-                      TEMP = TEMP + A(I,J)*X(IX)
-                      IX = IX + INCX
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SGEMV .
-*
-      END
diff --git a/superlu/BLAS/sger.f b/superlu/BLAS/sger.f
deleted file mode 100644
index c2a9958f..00000000
--- a/superlu/BLAS/sger.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b SGER
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SGER   performs the rank 1 operation
-*>
-*>    A := alpha*x*y**T + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SGER(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SGER  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of SGER  .
-*
-      END
diff --git a/superlu/BLAS/snrm2.f b/superlu/BLAS/snrm2.f
deleted file mode 100644
index 7de03d22..00000000
--- a/superlu/BLAS/snrm2.f
+++ /dev/null
@@ -1,112 +0,0 @@
-*> \brief \b SNRM2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       REAL FUNCTION SNRM2(N,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       REAL X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SNRM2 returns the euclidean norm of a vector via the function
-*> name, so that
-*>
-*>    SNRM2 := sqrt( x'*x ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  -- This version written on 25-October-1982.
-*>     Modified on 14-October-1993 to inline the call to SLASSQ.
-*>     Sven Hammarling, Nag Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      REAL FUNCTION SNRM2(N,X,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      REAL X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL ABSXI,NORM,SCALE,SSQ
-      INTEGER IX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,SQRT
-*     ..
-      IF (N.LT.1 .OR. INCX.LT.1) THEN
-          NORM = ZERO
-      ELSE IF (N.EQ.1) THEN
-          NORM = ABS(X(1))
-      ELSE
-          SCALE = ZERO
-          SSQ = ONE
-*        The following loop is equivalent to this call to the LAPACK
-*        auxiliary routine:
-*        CALL SLASSQ( N, X, INCX, SCALE, SSQ )
-*
-          DO 10 IX = 1,1 + (N-1)*INCX,INCX
-              IF (X(IX).NE.ZERO) THEN
-                  ABSXI = ABS(X(IX))
-                  IF (SCALE.LT.ABSXI) THEN
-                      SSQ = ONE + SSQ* (SCALE/ABSXI)**2
-                      SCALE = ABSXI
-                  ELSE
-                      SSQ = SSQ + (ABSXI/SCALE)**2
-                  END IF
-              END IF
-   10     CONTINUE
-          NORM = SCALE*SQRT(SSQ)
-      END IF
-*
-      SNRM2 = NORM
-      RETURN
-*
-*     End of SNRM2.
-*
-      END
diff --git a/superlu/BLAS/srot.f b/superlu/BLAS/srot.f
deleted file mode 100644
index fa8e2958..00000000
--- a/superlu/BLAS/srot.f
+++ /dev/null
@@ -1,101 +0,0 @@
-*> \brief \b SROT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SROT(N,SX,INCX,SY,INCY,C,S)
-*
-*       .. Scalar Arguments ..
-*       REAL C,S
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    applies a plane rotation.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SROT(N,SX,INCX,SY,INCY,C,S)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL C,S
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,IX,IY
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*       code for both increments equal to 1
-*
-         DO I = 1,N
-            STEMP = C*SX(I) + S*SY(I)
-            SY(I) = C*SY(I) - S*SX(I)
-            SX(I) = STEMP
-         END DO
-      ELSE
-*
-*       code for unequal increments or equal increments not equal
-*         to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = C*SX(IX) + S*SY(IY)
-            SY(IY) = C*SY(IY) - S*SX(IX)
-            SX(IX) = STEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/srotg.f b/superlu/BLAS/srotg.f
deleted file mode 100644
index b4484fb3..00000000
--- a/superlu/BLAS/srotg.f
+++ /dev/null
@@ -1,86 +0,0 @@
-*> \brief \b SROTG
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SROTG(SA,SB,C,S)
-*
-*       .. Scalar Arguments ..
-*       REAL C,S,SA,SB
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    SROTG construct givens plane rotation.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SROTG(SA,SB,C,S)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL C,S,SA,SB
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL R,ROE,SCALE,Z
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS,SIGN,SQRT
-*     ..
-      ROE = SB
-      IF (ABS(SA).GT.ABS(SB)) ROE = SA
-      SCALE = ABS(SA) + ABS(SB)
-      IF (SCALE.EQ.0.0) THEN
-         C = 1.0
-         S = 0.0
-         R = 0.0
-         Z = 0.0
-      ELSE
-         R = SCALE*SQRT((SA/SCALE)**2+ (SB/SCALE)**2)
-         R = SIGN(1.0,ROE)*R
-         C = SA/R
-         S = SB/R
-         Z = 1.0
-         IF (ABS(SA).GT.ABS(SB)) Z = S
-         IF (ABS(SB).GE.ABS(SA) .AND. C.NE.0.0) Z = 1.0/C
-      END IF
-      SA = R
-      SB = Z
-      RETURN
-      END
diff --git a/superlu/BLAS/srotm.f b/superlu/BLAS/srotm.f
deleted file mode 100644
index c71f7f01..00000000
--- a/superlu/BLAS/srotm.f
+++ /dev/null
@@ -1,203 +0,0 @@
-*> \brief \b SROTM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SROTM(N,SX,INCX,SY,INCY,SPARAM)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SPARAM(5),SX(*),SY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    APPLY THE MODIFIED GIVENS TRANSFORMATION, H, TO THE 2 BY N MATRIX
-*>
-*>    (SX**T) , WHERE **T INDICATES TRANSPOSE. THE ELEMENTS OF SX ARE IN
-*>    (SX**T)
-*>
-*>    SX(LX+I*INCX), I = 0 TO N-1, WHERE LX = 1 IF INCX .GE. 0, ELSE
-*>    LX = (-INCX)*N, AND SIMILARLY FOR SY USING USING LY AND INCY.
-*>    WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*>
-*>    SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0
-*>
-*>      (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0).
-*>    SEE  SROTMG FOR A DESCRIPTION OF DATA STORAGE IN SPARAM.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>         number of elements in input vector(s)
-*> \endverbatim
-*>
-*> \param[in,out] SX
-*> \verbatim
-*>          SX is REAL array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>         storage spacing between elements of SX
-*> \endverbatim
-*>
-*> \param[in,out] SY
-*> \verbatim
-*>          SY is REAL array, dimension N
-*>         double precision vector with N elements
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>         storage spacing between elements of SY
-*> \endverbatim
-*>
-*> \param[in,out] SPARAM
-*> \verbatim
-*>          SPARAM is REAL array, dimension 5
-*>     SPARAM(1)=SFLAG
-*>     SPARAM(2)=SH11
-*>     SPARAM(3)=SH21
-*>     SPARAM(4)=SH12
-*>     SPARAM(5)=SH22
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE SROTM(N,SX,INCX,SY,INCY,SPARAM)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SPARAM(5),SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL SFLAG,SH11,SH12,SH21,SH22,TWO,W,Z,ZERO
-      INTEGER I,KX,KY,NSTEPS
-*     ..
-*     .. Data statements ..
-      DATA ZERO,TWO/0.E0,2.E0/
-*     ..
-*
-      SFLAG = SPARAM(1)
-      IF (N.LE.0 .OR. (SFLAG+TWO.EQ.ZERO)) RETURN
-      IF (INCX.EQ.INCY.AND.INCX.GT.0) THEN
-*
-         NSTEPS = N*INCX
-         IF (SFLAG.LT.ZERO) THEN
-            SH11 = SPARAM(2)
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            SH22 = SPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = SX(I)
-               Z = SY(I)
-               SX(I) = W*SH11 + Z*SH12
-               SY(I) = W*SH21 + Z*SH22
-            END DO
-         ELSE IF (SFLAG.EQ.ZERO) THEN
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            DO I = 1,NSTEPS,INCX
-               W = SX(I)
-               Z = SY(I)
-               SX(I) = W + Z*SH12
-               SY(I) = W*SH21 + Z
-            END DO
-         ELSE
-            SH11 = SPARAM(2)
-            SH22 = SPARAM(5)
-            DO I = 1,NSTEPS,INCX
-               W = SX(I)
-               Z = SY(I)
-               SX(I) = W*SH11 + Z
-               SY(I) = -W + SH22*Z
-            END DO
-         END IF
-      ELSE
-         KX = 1
-         KY = 1
-         IF (INCX.LT.0) KX = 1 + (1-N)*INCX
-         IF (INCY.LT.0) KY = 1 + (1-N)*INCY
-*
-         IF (SFLAG.LT.ZERO) THEN
-            SH11 = SPARAM(2)
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            SH22 = SPARAM(5)
-            DO I = 1,N
-               W = SX(KX)
-               Z = SY(KY)
-               SX(KX) = W*SH11 + Z*SH12
-               SY(KY) = W*SH21 + Z*SH22
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-         ELSE IF (SFLAG.EQ.ZERO) THEN
-            SH12 = SPARAM(4)
-            SH21 = SPARAM(3)
-            DO I = 1,N
-               W = SX(KX)
-               Z = SY(KY)
-               SX(KX) = W + Z*SH12
-               SY(KY) = W*SH21 + Z
-               KX = KX + INCX
-               KY = KY + INCY
-            END DO
-         ELSE
-             SH11 = SPARAM(2)
-             SH22 = SPARAM(5)
-             DO I = 1,N
-                W = SX(KX)
-                Z = SY(KY)
-                SX(KX) = W*SH11 + Z
-                SY(KY) = -W + SH22*Z
-                KX = KX + INCX
-                KY = KY + INCY
-            END DO
-         END IF
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/srotmg.f b/superlu/BLAS/srotmg.f
deleted file mode 100644
index a5077c06..00000000
--- a/superlu/BLAS/srotmg.f
+++ /dev/null
@@ -1,251 +0,0 @@
-*> \brief \b SROTMG
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SROTMG(SD1,SD2,SX1,SY1,SPARAM)
-*
-*       .. Scalar Arguments ..
-*       REAL SD1,SD2,SX1,SY1
-*       ..
-*       .. Array Arguments ..
-*       REAL SPARAM(5)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    CONSTRUCT THE MODIFIED GIVENS TRANSFORMATION MATRIX H WHICH ZEROS
-*>    THE SECOND COMPONENT OF THE 2-VECTOR  (SQRT(SD1)*SX1,SQRT(SD2)*>    
SY2)**T.
-*>    WITH SPARAM(1)=SFLAG, H HAS ONE OF THE FOLLOWING FORMS..
-*>
-*>    SFLAG=-1.E0     SFLAG=0.E0        SFLAG=1.E0     SFLAG=-2.E0
-*>
-*>      (SH11  SH12)    (1.E0  SH12)    (SH11  1.E0)    (1.E0  0.E0)
-*>    H=(          )    (          )    (          )    (          )
-*>      (SH21  SH22),   (SH21  1.E0),   (-1.E0 SH22),   (0.E0  1.E0).
-*>    LOCATIONS 2-4 OF SPARAM CONTAIN SH11,SH21,SH12, AND SH22
-*>    RESPECTIVELY. (VALUES OF 1.E0, -1.E0, OR 0.E0 IMPLIED BY THE
-*>    VALUE OF SPARAM(1) ARE NOT STORED IN SPARAM.)
-*>
-*>    THE VALUES OF GAMSQ AND RGAMSQ SET IN THE DATA STATEMENT MAY BE
-*>    INEXACT.  THIS IS OK AS THEY ARE ONLY USED FOR TESTING THE SIZE
-*>    OF SD1 AND SD2.  ALL ACTUAL SCALING OF DATA IS DONE USING GAM.
-*>
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in,out] SD1
-*> \verbatim
-*>          SD1 is REAL
-*> \endverbatim
-*>
-*> \param[in,out] SD2
-*> \verbatim
-*>          SD2 is REAL
-*> \endverbatim
-*>
-*> \param[in,out] SX1
-*> \verbatim
-*>          SX1 is REAL
-*> \endverbatim
-*>
-*> \param[in] SY1
-*> \verbatim
-*>          SY1 is REAL
-*> \endverbatim
-*>
-*> \param[in,out] SPARAM
-*> \verbatim
-*>          SPARAM is REAL array, dimension 5
-*>     SPARAM(1)=SFLAG
-*>     SPARAM(2)=SH11
-*>     SPARAM(3)=SH21
-*>     SPARAM(4)=SH12
-*>     SPARAM(5)=SH22
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE SROTMG(SD1,SD2,SX1,SY1,SPARAM)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL SD1,SD2,SX1,SY1
-*     ..
-*     .. Array Arguments ..
-      REAL SPARAM(5)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL GAM,GAMSQ,ONE,RGAMSQ,SFLAG,SH11,SH12,SH21,SH22,SP1,SP2,SQ1,
-     $     SQ2,STEMP,SU,TWO,ZERO
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC ABS
-*     ..
-*     .. Data statements ..
-*
-      DATA ZERO,ONE,TWO/0.E0,1.E0,2.E0/
-      DATA GAM,GAMSQ,RGAMSQ/4096.E0,1.67772E7,5.96046E-8/
-*     ..
-
-      IF (SD1.LT.ZERO) THEN
-*        GO ZERO-H-D-AND-SX1..
-         SFLAG = -ONE
-         SH11 = ZERO
-         SH12 = ZERO
-         SH21 = ZERO
-         SH22 = ZERO
-*
-         SD1 = ZERO
-         SD2 = ZERO
-         SX1 = ZERO
-      ELSE
-*        CASE-SD1-NONNEGATIVE
-         SP2 = SD2*SY1
-         IF (SP2.EQ.ZERO) THEN
-            SFLAG = -TWO
-            SPARAM(1) = SFLAG
-            RETURN
-         END IF
-*        REGULAR-CASE..
-         SP1 = SD1*SX1
-         SQ2 = SP2*SY1
-         SQ1 = SP1*SX1
-*
-         IF (ABS(SQ1).GT.ABS(SQ2)) THEN
-            SH21 = -SY1/SX1
-            SH12 = SP2/SP1
-*
-            SU = ONE - SH12*SH21
-*
-           IF (SU.GT.ZERO) THEN
-             SFLAG = ZERO
-             SD1 = SD1/SU
-             SD2 = SD2/SU
-             SX1 = SX1*SU
-           END IF
-         ELSE
-
-            IF (SQ2.LT.ZERO) THEN
-*              GO ZERO-H-D-AND-SX1..
-               SFLAG = -ONE
-               SH11 = ZERO
-               SH12 = ZERO
-               SH21 = ZERO
-               SH22 = ZERO
-*
-               SD1 = ZERO
-               SD2 = ZERO
-               SX1 = ZERO
-            ELSE
-               SFLAG = ONE
-               SH11 = SP1/SP2
-               SH22 = SX1/SY1
-               SU = ONE + SH11*SH22
-               STEMP = SD2/SU
-               SD2 = SD1/SU
-               SD1 = STEMP
-               SX1 = SY1*SU
-            END IF
-         END IF
-
-*     PROCESURE..SCALE-CHECK
-         IF (SD1.NE.ZERO) THEN
-            DO WHILE ((SD1.LE.RGAMSQ) .OR. (SD1.GE.GAMSQ))
-               IF (SFLAG.EQ.ZERO) THEN
-                  SH11 = ONE
-                  SH22 = ONE
-                  SFLAG = -ONE
-               ELSE
-                  SH21 = -ONE
-                  SH12 = ONE
-                  SFLAG = -ONE
-               END IF
-               IF (SD1.LE.RGAMSQ) THEN
-                  SD1 = SD1*GAM**2
-                  SX1 = SX1/GAM
-                  SH11 = SH11/GAM
-                  SH12 = SH12/GAM
-               ELSE
-                  SD1 = SD1/GAM**2
-                  SX1 = SX1*GAM
-                  SH11 = SH11*GAM
-                  SH12 = SH12*GAM
-               END IF
-            ENDDO
-         END IF
-
-         IF (SD2.NE.ZERO) THEN
-            DO WHILE ( (ABS(SD2).LE.RGAMSQ) .OR. (ABS(SD2).GE.GAMSQ) )
-               IF (SFLAG.EQ.ZERO) THEN
-                  SH11 = ONE
-                  SH22 = ONE
-                  SFLAG = -ONE
-               ELSE
-                  SH21 = -ONE
-                  SH12 = ONE
-                  SFLAG = -ONE
-               END IF
-               IF (ABS(SD2).LE.RGAMSQ) THEN
-                  SD2 = SD2*GAM**2
-                  SH21 = SH21/GAM
-                  SH22 = SH22/GAM
-               ELSE
-                  SD2 = SD2/GAM**2
-                  SH21 = SH21*GAM
-                  SH22 = SH22*GAM
-               END IF
-            END DO
-         END IF
-
-      END IF
-
-      IF (SFLAG.LT.ZERO) THEN
-         SPARAM(2) = SH11
-         SPARAM(3) = SH21
-         SPARAM(4) = SH12
-         SPARAM(5) = SH22
-      ELSE IF (SFLAG.EQ.ZERO) THEN
-         SPARAM(3) = SH21
-         SPARAM(4) = SH12
-      ELSE
-         SPARAM(2) = SH11
-         SPARAM(5) = SH22
-      END IF
-
-      SPARAM(1) = SFLAG
-      RETURN
-      END
-
-
-
-
diff --git a/superlu/BLAS/ssbmv.f b/superlu/BLAS/ssbmv.f
deleted file mode 100644
index b711d8b0..00000000
--- a/superlu/BLAS/ssbmv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b SSBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER INCX,INCY,K,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSBMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric band matrix, with k super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the symmetric matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a symmetric band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER INCX,INCY,K,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when upper triangle of A is stored.
-*
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(KPLUS1,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when lower triangle of A is stored.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(1,J)
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(1,J)
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + A(L+I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSBMV .
-*
-      END
diff --git a/superlu/BLAS/sscal.f b/superlu/BLAS/sscal.f
deleted file mode 100644
index 2ffc1a25..00000000
--- a/superlu/BLAS/sscal.f
+++ /dev/null
@@ -1,110 +0,0 @@
-*> \brief \b SSCAL
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSCAL(N,SA,SX,INCX)
-*
-*       .. Scalar Arguments ..
-*       REAL SA
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    scales a vector by a constant.
-*>    uses unrolled loops for increment equal to 1.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSCAL(N,SA,SX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL SA
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,M,MP1,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-*
-*        clean-up loop
-*
-         M = MOD(N,5)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               SX(I) = SA*SX(I)
-            END DO
-            IF (N.LT.5) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,5
-            SX(I) = SA*SX(I)
-            SX(I+1) = SA*SX(I+1)
-            SX(I+2) = SA*SX(I+2)
-            SX(I+3) = SA*SX(I+3)
-            SX(I+4) = SA*SX(I+4)
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            SX(I) = SA*SX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/sspmv.f b/superlu/BLAS/sspmv.f
deleted file mode 100644
index bc8af3d4..00000000
--- a/superlu/BLAS/sspmv.f
+++ /dev/null
@@ -1,331 +0,0 @@
-*> \brief \b SSPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSPMV  performs the matrix-vector operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 6
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when AP contains the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*AP(KK+J-1) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when AP contains the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*AP(KK)
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*AP(KK)
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + AP(K)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSPMV .
-*
-      END
diff --git a/superlu/BLAS/sspr.f b/superlu/BLAS/sspr.f
deleted file mode 100644
index 52cb7317..00000000
--- a/superlu/BLAS/sspr.f
+++ /dev/null
@@ -1,261 +0,0 @@
-*> \brief \b SSPR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSPR    performs the symmetric rank 1 operation
-*>
-*>    A := alpha*x*x**T + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSPR  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSPR  .
-*
-      END
diff --git a/superlu/BLAS/sspr2.f b/superlu/BLAS/sspr2.f
deleted file mode 100644
index b4c81187..00000000
--- a/superlu/BLAS/sspr2.f
+++ /dev/null
@@ -1,296 +0,0 @@
-*> \brief \b SSPR2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSPR2  performs the symmetric rank 2 operation
-*>
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n symmetric matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the symmetric matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSPR2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 10 I = 1,J
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 1
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      K = KK
-                      DO 50 I = J,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK,KK + N - J
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSPR2 .
-*
-      END
diff --git a/superlu/BLAS/sswap.f b/superlu/BLAS/sswap.f
deleted file mode 100644
index f821a1e7..00000000
--- a/superlu/BLAS/sswap.f
+++ /dev/null
@@ -1,122 +0,0 @@
-*> \brief \b SSWAP
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSWAP(N,SX,INCX,SY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       REAL SX(*),SY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    interchanges two vectors.
-*>    uses unrolled loops for increments equal to 1.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSWAP(N,SX,INCX,SY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      REAL SX(*),SY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      REAL STEMP
-      INTEGER I,IX,IY,M,MP1
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MOD
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*       code for both increments equal to 1
-*
-*
-*       clean-up loop
-*
-         M = MOD(N,3)
-         IF (M.NE.0) THEN
-            DO I = 1,M
-               STEMP = SX(I)
-               SX(I) = SY(I)
-               SY(I) = STEMP
-            END DO
-            IF (N.LT.3) RETURN
-         END IF
-         MP1 = M + 1
-         DO I = MP1,N,3
-            STEMP = SX(I)
-            SX(I) = SY(I)
-            SY(I) = STEMP
-            STEMP = SX(I+1)
-            SX(I+1) = SY(I+1)
-            SY(I+1) = STEMP
-            STEMP = SX(I+2)
-            SX(I+2) = SY(I+2)
-            SY(I+2) = STEMP
-         END DO
-      ELSE
-*
-*       code for unequal increments or equal increments not equal
-*         to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            STEMP = SX(IX)
-            SX(IX) = SY(IY)
-            SY(IY) = STEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/ssymm.f b/superlu/BLAS/ssymm.f
deleted file mode 100644
index d3a193f7..00000000
--- a/superlu/BLAS/ssymm.f
+++ /dev/null
@@ -1,367 +0,0 @@
-*> \brief \b SSYMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER LDA,LDB,LDC,M,N
-*       CHARACTER SIDE,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSYMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*A*B + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*B*A + beta*C,
-*>
-*> where alpha and beta are scalars,  A is a symmetric matrix and  B and
-*> C are  m by n matrices.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is  n otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER LDA,LDB,LDC,M,N
-      CHARACTER SIDE,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Set NROWA as the number of rows of A.
-*
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(SIDE,'L')) THEN
-*
-*        Form  C := alpha*A*B + beta*C.
-*
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*B*A + beta*C.
-*
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of SSYMM .
-*
-      END
diff --git a/superlu/BLAS/ssymv.f b/superlu/BLAS/ssymv.f
deleted file mode 100644
index a1fa54f1..00000000
--- a/superlu/BLAS/ssymv.f
+++ /dev/null
@@ -1,333 +0,0 @@
-*> \brief \b SSYMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSYMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n symmetric matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 5
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when A is stored in upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*A(J,J) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*A(J,J) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when A is stored in lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*A(J,J)
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*A(J,J)
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + A(I,J)*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSYMV .
-*
-      END
diff --git a/superlu/BLAS/ssyr.f b/superlu/BLAS/ssyr.f
deleted file mode 100644
index 9d73f868..00000000
--- a/superlu/BLAS/ssyr.f
+++ /dev/null
@@ -1,263 +0,0 @@
-*> \brief \b SSYR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSYR(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSYR   performs the symmetric rank 1 operation
-*>
-*>    A := alpha*x*x**T + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n symmetric matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSYR(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYR  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in upper triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = KX
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in lower triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*X(JX)
-                      IX = JX
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSYR  .
-*
-      END
diff --git a/superlu/BLAS/ssyr2.f b/superlu/BLAS/ssyr2.f
deleted file mode 100644
index a2a083ad..00000000
--- a/superlu/BLAS/ssyr2.f
+++ /dev/null
@@ -1,298 +0,0 @@
-*> \brief \b SSYR2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSYR2  performs the symmetric rank 2 operation
-*>
-*>    A := alpha*x*y**T + alpha*y*x**T + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n symmetric matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the symmetric matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the symmetric matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSYR2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYR2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 10 I = 1,J
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(J)
-                      TEMP2 = ALPHA*X(J)
-                      DO 50 I = J,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*Y(JY)
-                      TEMP2 = ALPHA*X(JX)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J,N
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   70                 CONTINUE
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSYR2 .
-*
-      END
diff --git a/superlu/BLAS/ssyr2k.f b/superlu/BLAS/ssyr2k.f
deleted file mode 100644
index 4a705f79..00000000
--- a/superlu/BLAS/ssyr2k.f
+++ /dev/null
@@ -1,399 +0,0 @@
-*> \brief \b SSYR2K
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSYR2K  performs one of the symmetric rank 2k operations
-*>
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*B**T + alpha*B*A**T +
-*>                                        beta*C.
-*>
-*>              TRANS = 'T' or 't'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*>
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*B + alpha*B**T*A +
-*>                                        beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrices  A and B.  K must be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP1,TEMP2
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYR2K',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSYR2K.
-*
-      END
diff --git a/superlu/BLAS/ssyrk.f b/superlu/BLAS/ssyrk.f
deleted file mode 100644
index ecb1e4f1..00000000
--- a/superlu/BLAS/ssyrk.f
+++ /dev/null
@@ -1,364 +0,0 @@
-*> \brief \b SSYRK
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE SSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA,BETA
-*       INTEGER K,LDA,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> SSYRK  performs one of the symmetric rank k operations
-*>
-*>    C := alpha*A*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars, C is an  n by n  symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*>
-*>              TRANS = 'C' or 'c'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't' or 'C' or 'c',  K  specifies  the  number
-*>           of rows of the matrix  A.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is REAL
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is REAL array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE SSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA,BETA
-      INTEGER K,LDA,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('SSYRK ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*A**T + beta*C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*A + beta*C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of SSYRK .
-*
-      END
diff --git a/superlu/BLAS/stbmv.f b/superlu/BLAS/stbmv.f
deleted file mode 100644
index 4323864e..00000000
--- a/superlu/BLAS/stbmv.f
+++ /dev/null
@@ -1,398 +0,0 @@
-*> \brief \b STBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STBMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*         Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 90 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                      DO 110 I = J - 1,MAX(1,J-K),-1
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 130 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOUNIT) TEMP = TEMP*A(1,J)
-                      DO 150 I = J + 1,MIN(N,J+K)
-                          TEMP = TEMP + A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STBMV .
-*
-      END
diff --git a/superlu/BLAS/stbsv.f b/superlu/BLAS/stbsv.f
deleted file mode 100644
index 00aaeba6..00000000
--- a/superlu/BLAS/stbsv.f
+++ /dev/null
@@ -1,401 +0,0 @@
-*> \brief \b STBSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STBSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STBSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T)*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      DO 90 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      DO 110 I = MAX(1,J-K),J - 1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      DO 130 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      DO 150 I = MIN(N,J+K),J + 1,-1
-                          TEMP = TEMP - A(L+I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STBSV .
-*
-      END
diff --git a/superlu/BLAS/stpmv.f b/superlu/BLAS/stpmv.f
deleted file mode 100644
index 765e7f91..00000000
--- a/superlu/BLAS/stpmv.f
+++ /dev/null
@@ -1,352 +0,0 @@
-*> \brief \b STPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STPMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x:= A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK - 1
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K - 1
-   90                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK - J
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 110 K = KK - 1,KK - J + 1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      K = KK + 1
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + AP(K)*X(I)
-                          K = K + 1
-  130                 CONTINUE
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*AP(KK)
-                      DO 150 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          TEMP = TEMP + AP(K)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STPMV .
-*
-      END
diff --git a/superlu/BLAS/stpsv.f b/superlu/BLAS/stpsv.f
deleted file mode 100644
index 5a29776d..00000000
--- a/superlu/BLAS/stpsv.f
+++ /dev/null
@@ -1,354 +0,0 @@
-*> \brief \b STPSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STPSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is REAL array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STPSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K + 1
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(J) = TEMP
-                      KK = KK + J
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 K = KK,KK + J - 2
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  120             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - AP(K)*X(I)
-                          K = K - 1
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 K = KK,KK - (N- (J+1)),-1
-                          TEMP = TEMP - AP(K)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STPSV .
-*
-      END
diff --git a/superlu/BLAS/strmm.f b/superlu/BLAS/strmm.f
deleted file mode 100644
index dd208721..00000000
--- a/superlu/BLAS/strmm.f
+++ /dev/null
@@ -1,415 +0,0 @@
-*> \brief \b STRMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STRMM  performs one of the matrix-matrix operations
-*>
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A ),
-*>
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STRMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*A*B.
-*
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*A**T*B.
-*
-              IF (UPPER) THEN
-                  DO 110 J = 1,N
-                      DO 100 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 90 K = 1,I - 1
-                              TEMP = TEMP + A(K,I)*B(K,J)
-   90                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  100                 CONTINUE
-  110             CONTINUE
-              ELSE
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOUNIT) TEMP = TEMP*A(I,I)
-                          DO 120 K = I + 1,M
-                              TEMP = TEMP + A(K,I)*B(K,J)
-  120                     CONTINUE
-                          B(I,J) = ALPHA*TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*A.
-*
-              IF (UPPER) THEN
-                  DO 180 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  150                 CONTINUE
-                      DO 170 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 160 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  160                         CONTINUE
-                          END IF
-  170                 CONTINUE
-  180             CONTINUE
-              ELSE
-                  DO 220 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 190 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  190                 CONTINUE
-                      DO 210 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-  220             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*A**T.
-*
-              IF (UPPER) THEN
-                  DO 260 K = 1,N
-                      DO 240 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 250 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              ELSE
-                  DO 300 K = N,1,-1
-                      DO 280 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = ALPHA*A(J,K)
-                              DO 270 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  270                         CONTINUE
-                          END IF
-  280                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(K,K)
-                      IF (TEMP.NE.ONE) THEN
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-  300             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STRMM .
-*
-      END
diff --git a/superlu/BLAS/strmv.f b/superlu/BLAS/strmv.f
deleted file mode 100644
index ba3d7b6a..00000000
--- a/superlu/BLAS/strmv.f
+++ /dev/null
@@ -1,342 +0,0 @@
-*> \brief \b STRMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STRMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**T*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STRMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 90 I = J - 1,1,-1
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 120 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 110 I = J - 1,1,-1
-                          IX = IX - INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  110                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = 1,N
-                      TEMP = X(J)
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 130 I = J + 1,N
-                          TEMP = TEMP + A(I,J)*X(I)
-  130                 CONTINUE
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 160 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 150 I = J + 1,N
-                          IX = IX + INCX
-                          TEMP = TEMP + A(I,J)*X(IX)
-  150                 CONTINUE
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STRMV .
-*
-      END
diff --git a/superlu/BLAS/strsm.f b/superlu/BLAS/strsm.f
deleted file mode 100644
index f2927fe3..00000000
--- a/superlu/BLAS/strsm.f
+++ /dev/null
@@ -1,443 +0,0 @@
-*> \brief \b STRSM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       REAL ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STRSM  solves one of the matrix equations
-*>
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*>
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T.
-*>
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**T.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is REAL
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is REAL array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      REAL ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      REAL ONE,ZERO
-      PARAMETER (ONE=1.0E+0,ZERO=0.0E+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STRSM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*inv( A )*B.
-*
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*inv( A**T )*B.
-*
-              IF (UPPER) THEN
-                  DO 130 J = 1,N
-                      DO 120 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          DO 110 K = 1,I - 1
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  110                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  120                 CONTINUE
-  130             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          DO 140 K = I + 1,M
-                              TEMP = TEMP - A(K,I)*B(K,J)
-  140                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          B(I,J) = TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*inv( A ).
-*
-              IF (UPPER) THEN
-                  DO 210 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 170 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  170                     CONTINUE
-                      END IF
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 200 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  200                     CONTINUE
-                      END IF
-  210             CONTINUE
-              ELSE
-                  DO 260 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 220 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  220                     CONTINUE
-                      END IF
-                      DO 240 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 230 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  230                         CONTINUE
-                          END IF
-  240                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 250 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  250                     CONTINUE
-                      END IF
-  260             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*inv( A**T ).
-*
-              IF (UPPER) THEN
-                  DO 310 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-                      DO 290 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 280 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  280                         CONTINUE
-                          END IF
-  290                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 300 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  300                     CONTINUE
-                      END IF
-  310             CONTINUE
-              ELSE
-                  DO 360 K = 1,N
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(K,K)
-                          DO 320 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  320                     CONTINUE
-                      END IF
-                      DO 340 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              TEMP = A(J,K)
-                              DO 330 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  330                         CONTINUE
-                          END IF
-  340                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 350 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  350                     CONTINUE
-                      END IF
-  360             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STRSM .
-*
-      END
diff --git a/superlu/BLAS/strsv.f b/superlu/BLAS/strsv.f
deleted file mode 100644
index a31651b9..00000000
--- a/superlu/BLAS/strsv.f
+++ /dev/null
@@ -1,344 +0,0 @@
-*> \brief \b STRSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE STRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       REAL A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> STRSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**T*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is REAL array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is REAL array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup single_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE STRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      REAL A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      REAL ZERO
-      PARAMETER (ZERO=0.0E+0)
-*     ..
-*     .. Local Scalars ..
-      REAL TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('STRSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 100 J = 1,N
-                      TEMP = X(J)
-                      DO 90 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(I)
-   90                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  100             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 120 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 110 I = 1,J - 1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX + INCX
-  110                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  120             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 140 J = N,1,-1
-                      TEMP = X(J)
-                      DO 130 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(I)
-  130                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(J) = TEMP
-  140             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 160 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      DO 150 I = N,J + 1,-1
-                          TEMP = TEMP - A(I,J)*X(IX)
-                          IX = IX - INCX
-  150                 CONTINUE
-                      IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  160             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of STRSV .
-*
-      END
diff --git a/superlu/BLAS/xerbla.f b/superlu/BLAS/xerbla.f
deleted file mode 100644
index bbe6cceb..00000000
--- a/superlu/BLAS/xerbla.f
+++ /dev/null
@@ -1,89 +0,0 @@
-*> \brief \b XERBLA
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE XERBLA( SRNAME, INFO )
-*
-*       .. Scalar Arguments ..
-*       CHARACTER*(*)      SRNAME
-*       INTEGER            INFO
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> XERBLA  is an error handler for the LAPACK routines.
-*> It is called by an LAPACK routine if an input parameter has an
-*> invalid value.  A message is printed and execution stops.
-*>
-*> Installers may consider modifying the STOP statement in order to
-*> call system-specific exception-handling facilities.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SRNAME
-*> \verbatim
-*>          SRNAME is CHARACTER*(*)
-*>          The name of the routine which called XERBLA.
-*> \endverbatim
-*>
-*> \param[in] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          The position of the invalid parameter in the parameter list
-*>          of the calling routine.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*  =====================================================================
-      SUBROUTINE XERBLA( SRNAME, INFO )
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      CHARACTER*(*)      SRNAME
-      INTEGER            INFO
-*     ..
-*
-* =====================================================================
-*
-*     .. Intrinsic Functions ..
-      INTRINSIC          LEN_TRIM
-*     ..
-*     .. Executable Statements ..
-*
-      WRITE( *, FMT = 9999 )SRNAME( 1:LEN_TRIM( SRNAME ) ), INFO
-*
-      STOP
-*
- 9999 FORMAT( ' ** On entry to ', A, ' parameter number ', I2, ' had ',
-     $      'an illegal value' )
-*
-*     End of XERBLA
-*
-      END
diff --git a/superlu/BLAS/xerbla_array.f b/superlu/BLAS/xerbla_array.f
deleted file mode 100644
index df4e6273..00000000
--- a/superlu/BLAS/xerbla_array.f
+++ /dev/null
@@ -1,119 +0,0 @@
-*> \brief \b XERBLA_ARRAY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE XERBLA_ARRAY(SRNAME_ARRAY, SRNAME_LEN, INFO)
-*
-*       .. Scalar Arguments ..
-*       INTEGER SRNAME_LEN, INFO
-*       ..
-*       .. Array Arguments ..
-*       CHARACTER(1) SRNAME_ARRAY(SRNAME_LEN)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> XERBLA_ARRAY assists other languages in calling XERBLA, the LAPACK
-*> and BLAS error handler.  Rather than taking a Fortran string argument
-*> as the function's name, XERBLA_ARRAY takes an array of single
-*> characters along with the array's length.  XERBLA_ARRAY then copies
-*> up to 32 characters of that array into a Fortran string and passes
-*> that to XERBLA.  If called with a non-positive SRNAME_LEN,
-*> XERBLA_ARRAY will call XERBLA with a string of all blank characters.
-*>
-*> Say some macro or other device makes XERBLA_ARRAY available to C99
-*> by a name lapack_xerbla and with a common Fortran calling convention.
-*> Then a C99 program could invoke XERBLA via:
-*>    {
-*>      int flen = strlen(__func__);
-*>      lapack_xerbla(__func__, &flen, &info);
-*>    }
-*>
-*> Providing XERBLA_ARRAY is not necessary for intercepting LAPACK
-*> errors.  XERBLA_ARRAY calls XERBLA.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SRNAME_ARRAY
-*> \verbatim
-*>          SRNAME_ARRAY is CHARACTER(1) array, dimension (SRNAME_LEN)
-*>          The name of the routine which called XERBLA_ARRAY.
-*> \endverbatim
-*>
-*> \param[in] SRNAME_LEN
-*> \verbatim
-*>          SRNAME_LEN is INTEGER
-*>          The length of the name in SRNAME_ARRAY.
-*> \endverbatim
-*>
-*> \param[in] INFO
-*> \verbatim
-*>          INFO is INTEGER
-*>          The position of the invalid parameter in the parameter list
-*>          of the calling routine.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup aux_blas
-*
-*  =====================================================================
-      SUBROUTINE XERBLA_ARRAY(SRNAME_ARRAY, SRNAME_LEN, INFO)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER SRNAME_LEN, INFO
-*     ..
-*     .. Array Arguments ..
-      CHARACTER(1) SRNAME_ARRAY(SRNAME_LEN)
-*     ..
-*
-* =====================================================================
-*
-*     ..
-*     .. Local Scalars ..
-      INTEGER I
-*     ..
-*     .. Local Arrays ..
-      CHARACTER*32 SRNAME
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MIN, LEN
-*     ..
-*     .. External Functions ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Executable Statements ..
-      SRNAME = ''
-      DO I = 1, MIN( SRNAME_LEN, LEN( SRNAME ) )
-         SRNAME( I:I ) = SRNAME_ARRAY( I )
-      END DO
-
-      CALL XERBLA( SRNAME, INFO )
-
-      RETURN
-      END
diff --git a/superlu/BLAS/zaxpy.f b/superlu/BLAS/zaxpy.f
deleted file mode 100644
index bca78fb7..00000000
--- a/superlu/BLAS/zaxpy.f
+++ /dev/null
@@ -1,102 +0,0 @@
-*> \brief \b ZAXPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ZA
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ZAXPY constant times a vector plus a vector.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZAXPY(N,ZA,ZX,INCX,ZY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ZA
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY
-*     ..
-*     .. External Functions ..
-      DOUBLE PRECISION DCABS1
-      EXTERNAL DCABS1
-*     ..
-      IF (N.LE.0) RETURN
-      IF (DCABS1(ZA).EQ.0.0d0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            ZY(I) = ZY(I) + ZA*ZX(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZY(IY) = ZY(IY) + ZA*ZX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-*
-      RETURN
-      END
diff --git a/superlu/BLAS/zcopy.f b/superlu/BLAS/zcopy.f
deleted file mode 100644
index 830548ab..00000000
--- a/superlu/BLAS/zcopy.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b ZCOPY
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ZCOPY copies a vector, x, to a vector, y.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, linpack, 4/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZCOPY(N,ZX,INCX,ZY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,IX,IY
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-          ZY(I) = ZX(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZY(IY) = ZX(IX)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zdotc.f b/superlu/BLAS/zdotc.f
deleted file mode 100644
index 70119ec5..00000000
--- a/superlu/BLAS/zdotc.f
+++ /dev/null
@@ -1,103 +0,0 @@
-*> \brief \b ZDOTC
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZDOTC forms the dot product of two complex vectors
-*>      ZDOTC = X^H * Y
-*>
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX*16 ZTEMP
-      INTEGER I,IX,IY
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG
-*     ..
-      ZTEMP = (0.0d0,0.0d0)
-      ZDOTC = (0.0d0,0.0d0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            ZTEMP = ZTEMP + DCONJG(ZX(I))*ZY(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZTEMP = ZTEMP + DCONJG(ZX(IX))*ZY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      ZDOTC = ZTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/zdotu.f b/superlu/BLAS/zdotu.f
deleted file mode 100644
index 318fae24..00000000
--- a/superlu/BLAS/zdotu.f
+++ /dev/null
@@ -1,100 +0,0 @@
-*> \brief \b ZDOTU
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZDOTU forms the dot product of two complex vectors
-*>      ZDOTU = X^T * Y
-*>
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX*16 ZTEMP
-      INTEGER I,IX,IY
-*     ..
-      ZTEMP = (0.0d0,0.0d0)
-      ZDOTU = (0.0d0,0.0d0)
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1,N
-            ZTEMP = ZTEMP + ZX(I)*ZY(I)
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments
-*          not equal to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZTEMP = ZTEMP + ZX(IX)*ZY(IY)
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      ZDOTU = ZTEMP
-      RETURN
-      END
diff --git a/superlu/BLAS/zdrot.f b/superlu/BLAS/zdrot.f
deleted file mode 100644
index 8a4cf652..00000000
--- a/superlu/BLAS/zdrot.f
+++ /dev/null
@@ -1,153 +0,0 @@
-*> \brief \b ZDROT
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZDROT( N, CX, INCX, CY, INCY, C, S )
-*
-*       .. Scalar Arguments ..
-*       INTEGER            INCX, INCY, N
-*       DOUBLE PRECISION   C, S
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16         CX( * ), CY( * )
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> Applies a plane rotation, where the cos and sin (c and s) are real
-*> and the vectors cx and cy are complex.
-*> jack dongarra, linpack, 3/11/78.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the vectors cx and cy.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in,out] CX
-*> \verbatim
-*>          CX is COMPLEX*16 array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array CX must contain the n
-*>           element vector cx. On exit, CX is overwritten by the updated
-*>           vector cx.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           CX. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] CY
-*> \verbatim
-*>          CY is COMPLEX*16 array, dimension at least
-*>           ( 1 + ( N - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array CY must contain the n
-*>           element vector cy. On exit, CY is overwritten by the updated
-*>           vector cy.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           CY. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in] C
-*> \verbatim
-*>          C is DOUBLE PRECISION
-*>           On entry, C specifies the cosine, cos.
-*> \endverbatim
-*>
-*> \param[in] S
-*> \verbatim
-*>          S is DOUBLE PRECISION
-*>           On entry, S specifies the sine, sin.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE ZDROT( N, CX, INCX, CY, INCY, C, S )
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER            INCX, INCY, N
-      DOUBLE PRECISION   C, S
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16         CX( * ), CY( * )
-*     ..
-*
-* =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER            I, IX, IY
-      COMPLEX*16         CTEMP
-*     ..
-*     .. Executable Statements ..
-*
-      IF( N.LE.0 )
-     $   RETURN
-      IF( INCX.EQ.1 .AND. INCY.EQ.1 ) THEN
-*
-*        code for both increments equal to 1
-*
-         DO I = 1, N
-            CTEMP = C*CX( I ) + S*CY( I )
-            CY( I ) = C*CY( I ) - S*CX( I )
-            CX( I ) = CTEMP
-         END DO
-      ELSE
-*
-*        code for unequal increments or equal increments not equal
-*          to 1
-*
-         IX = 1
-         IY = 1
-         IF( INCX.LT.0 )
-     $      IX = ( -N+1 )*INCX + 1
-         IF( INCY.LT.0 )
-     $      IY = ( -N+1 )*INCY + 1
-         DO I = 1, N
-            CTEMP = C*CX( IX ) + S*CY( IY )
-            CY( IY ) = C*CY( IY ) - S*CX( IX )
-            CX( IX ) = CTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zdscal.f b/superlu/BLAS/zdscal.f
deleted file mode 100644
index def90785..00000000
--- a/superlu/BLAS/zdscal.f
+++ /dev/null
@@ -1,94 +0,0 @@
-*> \brief \b ZDSCAL
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZDSCAL(N,DA,ZX,INCX)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION DA
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ZDSCAL scales a vector by a constant.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZDSCAL(N,DA,ZX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION DA
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,NINCX
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCMPLX
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            ZX(I) = DCMPLX(DA,0.0d0)*ZX(I)
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            ZX(I) = DCMPLX(DA,0.0d0)*ZX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zgbmv.f b/superlu/BLAS/zgbmv.f
deleted file mode 100644
index f49da221..00000000
--- a/superlu/BLAS/zgbmv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b ZGBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER INCX,INCY,KL,KU,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZGBMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>
-*>    y := alpha*A**H*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n band matrix, with kl sub-diagonals and ku super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] KL
-*> \verbatim
-*>          KL is INTEGER
-*>           On entry, KL specifies the number of sub-diagonals of the
-*>           matrix A. KL must satisfy  0 .le. KL.
-*> \endverbatim
-*>
-*> \param[in] KU
-*> \verbatim
-*>          KU is INTEGER
-*>           On entry, KU specifies the number of super-diagonals of the
-*>           matrix A. KU must satisfy  0 .le. KU.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading ( kl + ku + 1 ) by n part of the
-*>           array A must contain the matrix of coefficients, supplied
-*>           column by column, with the leading diagonal of the matrix in
-*>           row ( ku + 1 ) of the array, the first super-diagonal
-*>           starting at position 2 in row ku, the first sub-diagonal
-*>           starting at position 1 in row ( ku + 2 ), and so on.
-*>           Elements in the array A that do not correspond to elements
-*>           in the band matrix (such as the top left ku by ku triangle)
-*>           are not referenced.
-*>           The following program segment will transfer a band matrix
-*>           from conventional full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    K = KU + 1 - J
-*>                    DO 10, I = MAX( 1, J - KU ), MIN( M, J + KL )
-*>                       A( K + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( kl + ku + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER INCX,INCY,KL,KU,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KUP1,KX,KY,LENX,LENY
-      LOGICAL NOCONJ
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (KL.LT.0) THEN
-          INFO = 4
-      ELSE IF (KU.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (KL+KU+1)) THEN
-          INFO = 8
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 10
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZGBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the band part of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KUP1 = KU + 1
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  K = KUP1 - J
-                  DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(I) = Y(I) + TEMP*A(K+I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  K = KUP1 - J
-                  DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
-                      Y(IY) = Y(IY) + TEMP*A(K+I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-                  IF (J.GT.KU) KY = KY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 90 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + DCONJG(A(K+I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  K = KUP1 - J
-                  IF (NOCONJ) THEN
-                      DO 120 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + A(K+I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = MAX(1,J-KU),MIN(M,J+KL)
-                          TEMP = TEMP + DCONJG(A(K+I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-                  IF (J.GT.KU) KX = KX + INCX
-  140         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZGBMV .
-*
-      END
diff --git a/superlu/BLAS/zgemm.f b/superlu/BLAS/zgemm.f
deleted file mode 100644
index a1726321..00000000
--- a/superlu/BLAS/zgemm.f
+++ /dev/null
@@ -1,483 +0,0 @@
-*> \brief \b ZGEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,M,N
-*       CHARACTER TRANSA,TRANSB
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZGEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*op( A )*op( B ) + beta*C,
-*>
-*> where  op( X ) is one of
-*>
-*>    op( X ) = X   or   op( X ) = X**T   or   op( X ) = X**H,
-*>
-*> alpha and beta are scalars, and A, B and C are matrices, with op( A )
-*> an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n',  op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't',  op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c',  op( A ) = A**H.
-*> \endverbatim
-*>
-*> \param[in] TRANSB
-*> \verbatim
-*>          TRANSB is CHARACTER*1
-*>           On entry, TRANSB specifies the form of op( B ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSB = 'N' or 'n',  op( B ) = B.
-*>
-*>              TRANSB = 'T' or 't',  op( B ) = B**T.
-*>
-*>              TRANSB = 'C' or 'c',  op( B ) = B**H.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies  the number  of rows  of the  matrix
-*>           op( A )  and of the  matrix  C.  M  must  be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N  specifies the number  of columns of the matrix
-*>           op( B ) and the number of columns of the matrix C. N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry,  K  specifies  the number of columns of the matrix
-*>           op( A ) and the number of rows of the matrix op( B ). K must
-*>           be at least  zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANSA = 'N' or 'n',  and is  m  otherwise.
-*>           Before entry with  TRANSA = 'N' or 'n',  the leading  m by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by m  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. When  TRANSA = 'N' or 'n' then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
-*>           n  when  TRANSB = 'N' or 'n',  and is  k  otherwise.
-*>           Before entry with  TRANSB = 'N' or 'n',  the leading  k by n
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  n by k  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in the calling (sub) program. When  TRANSB = 'N' or 'n' then
-*>           LDB must be at least  max( 1, k ), otherwise  LDB must be at
-*>           least  max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n  matrix
-*>           ( alpha*op( A )*op( B ) + beta*C ).
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,M,N
-      CHARACTER TRANSA,TRANSB
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,J,L,NCOLA,NROWA,NROWB
-      LOGICAL CONJA,CONJB,NOTA,NOTB
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Set  NOTA  and  NOTB  as  true if  A  and  B  respectively are not
-*     conjugated or transposed, set  CONJA and CONJB  as true if  A  and
-*     B  respectively are to be  transposed but  not conjugated  and set
-*     NROWA, NCOLA and  NROWB  as the number of rows and  columns  of  A
-*     and the number of rows of  B  respectively.
-*
-      NOTA = LSAME(TRANSA,'N')
-      NOTB = LSAME(TRANSB,'N')
-      CONJA = LSAME(TRANSA,'C')
-      CONJB = LSAME(TRANSB,'C')
-      IF (NOTA) THEN
-          NROWA = M
-          NCOLA = K
-      ELSE
-          NROWA = K
-          NCOLA = M
-      END IF
-      IF (NOTB) THEN
-          NROWB = K
-      ELSE
-          NROWB = N
-      END IF
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.NOTA) .AND. (.NOT.CONJA) .AND.
-     +    (.NOT.LSAME(TRANSA,'T'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.NOTB) .AND. (.NOT.CONJB) .AND.
-     +         (.NOT.LSAME(TRANSB,'T'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 8
-      ELSE IF (LDB.LT.MAX(1,NROWB)) THEN
-          INFO = 10
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 13
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZGEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    (((ALPHA.EQ.ZERO).OR. (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (NOTB) THEN
-          IF (NOTA) THEN
-*
-*           Form  C := alpha*A*B + beta*C.
-*
-              DO 90 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 50 I = 1,M
-                          C(I,J) = ZERO
-   50                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 60 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-   60                 CONTINUE
-                  END IF
-                  DO 80 L = 1,K
-                      TEMP = ALPHA*B(L,J)
-                      DO 70 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-   70                 CONTINUE
-   80             CONTINUE
-   90         CONTINUE
-          ELSE IF (CONJA) THEN
-*
-*           Form  C := alpha*A**H*B + beta*C.
-*
-              DO 120 J = 1,N
-                  DO 110 I = 1,M
-                      TEMP = ZERO
-                      DO 100 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*B(L,J)
-  100                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  110             CONTINUE
-  120         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B + beta*C
-*
-              DO 150 J = 1,N
-                  DO 140 I = 1,M
-                      TEMP = ZERO
-                      DO 130 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(L,J)
-  130                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  140             CONTINUE
-  150         CONTINUE
-          END IF
-      ELSE IF (NOTA) THEN
-          IF (CONJB) THEN
-*
-*           Form  C := alpha*A*B**H + beta*C.
-*
-              DO 200 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 160 I = 1,M
-                          C(I,J) = ZERO
-  160                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 170 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  170                 CONTINUE
-                  END IF
-                  DO 190 L = 1,K
-                      TEMP = ALPHA*DCONJG(B(J,L))
-                      DO 180 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  180                 CONTINUE
-  190             CONTINUE
-  200         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A*B**T + beta*C
-*
-              DO 250 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 210 I = 1,M
-                          C(I,J) = ZERO
-  210                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 220 I = 1,M
-                          C(I,J) = BETA*C(I,J)
-  220                 CONTINUE
-                  END IF
-                  DO 240 L = 1,K
-                      TEMP = ALPHA*B(J,L)
-                      DO 230 I = 1,M
-                          C(I,J) = C(I,J) + TEMP*A(I,L)
-  230                 CONTINUE
-  240             CONTINUE
-  250         CONTINUE
-          END IF
-      ELSE IF (CONJA) THEN
-          IF (CONJB) THEN
-*
-*           Form  C := alpha*A**H*B**H + beta*C.
-*
-              DO 280 J = 1,N
-                  DO 270 I = 1,M
-                      TEMP = ZERO
-                      DO 260 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*DCONJG(B(J,L))
-  260                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  270             CONTINUE
-  280         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**H*B**T + beta*C
-*
-              DO 310 J = 1,N
-                  DO 300 I = 1,M
-                      TEMP = ZERO
-                      DO 290 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*B(J,L)
-  290                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  300             CONTINUE
-  310         CONTINUE
-          END IF
-      ELSE
-          IF (CONJB) THEN
-*
-*           Form  C := alpha*A**T*B**H + beta*C
-*
-              DO 340 J = 1,N
-                  DO 330 I = 1,M
-                      TEMP = ZERO
-                      DO 320 L = 1,K
-                          TEMP = TEMP + A(L,I)*DCONJG(B(J,L))
-  320                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  330             CONTINUE
-  340         CONTINUE
-          ELSE
-*
-*           Form  C := alpha*A**T*B**T + beta*C
-*
-              DO 370 J = 1,N
-                  DO 360 I = 1,M
-                      TEMP = ZERO
-                      DO 350 L = 1,K
-                          TEMP = TEMP + A(L,I)*B(J,L)
-  350                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  360             CONTINUE
-  370         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZGEMM .
-*
-      END
diff --git a/superlu/BLAS/zgemv.f b/superlu/BLAS/zgemv.f
deleted file mode 100644
index 01e44d46..00000000
--- a/superlu/BLAS/zgemv.f
+++ /dev/null
@@ -1,350 +0,0 @@
-*> \brief \b ZGEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,M,N
-*       CHARACTER TRANS
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZGEMV  performs one of the matrix-vector operations
-*>
-*>    y := alpha*A*x + beta*y,   or   y := alpha*A**T*x + beta*y,   or
-*>
-*>    y := alpha*A**H*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are vectors and A is an
-*> m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.
-*>
-*>              TRANS = 'T' or 't'   y := alpha*A**T*x + beta*y.
-*>
-*>              TRANS = 'C' or 'c'   y := alpha*A**H*x + beta*y.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
-*>           and at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.
-*>           Before entry with BETA non-zero, the incremented array Y
-*>           must contain the vector y. On exit, Y is overwritten by the
-*>           updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER INCX,INCY,LDA,M,N
-      CHARACTER TRANS
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY,LENX,LENY
-      LOGICAL NOCONJ
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +    .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 1
-      ELSE IF (M.LT.0) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZGEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-*
-*     Set  LENX  and  LENY, the lengths of the vectors x and y, and set
-*     up the start points in  X  and  Y.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          LENX = N
-          LENY = M
-      ELSE
-          LENX = M
-          LENY = N
-      END IF
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (LENX-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (LENY-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,LENY
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,LENY
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,LENY
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,LENY
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  y := alpha*A*x + y.
-*
-          JX = KX
-          IF (INCY.EQ.1) THEN
-              DO 60 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  DO 50 I = 1,M
-                      Y(I) = Y(I) + TEMP*A(I,J)
-   50             CONTINUE
-                  JX = JX + INCX
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  TEMP = ALPHA*X(JX)
-                  IY = KY
-                  DO 70 I = 1,M
-                      Y(IY) = Y(IY) + TEMP*A(I,J)
-                      IY = IY + INCY
-   70             CONTINUE
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y := alpha*A**T*x + y  or  y := alpha*A**H*x + y.
-*
-          JY = KY
-          IF (INCX.EQ.1) THEN
-              DO 110 J = 1,N
-                  TEMP = ZERO
-                  IF (NOCONJ) THEN
-                      DO 90 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(I)
-   90                 CONTINUE
-                  ELSE
-                      DO 100 I = 1,M
-                          TEMP = TEMP + DCONJG(A(I,J))*X(I)
-  100                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  110         CONTINUE
-          ELSE
-              DO 140 J = 1,N
-                  TEMP = ZERO
-                  IX = KX
-                  IF (NOCONJ) THEN
-                      DO 120 I = 1,M
-                          TEMP = TEMP + A(I,J)*X(IX)
-                          IX = IX + INCX
-  120                 CONTINUE
-                  ELSE
-                      DO 130 I = 1,M
-                          TEMP = TEMP + DCONJG(A(I,J))*X(IX)
-                          IX = IX + INCX
-  130                 CONTINUE
-                  END IF
-                  Y(JY) = Y(JY) + ALPHA*TEMP
-                  JY = JY + INCY
-  140         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZGEMV .
-*
-      END
diff --git a/superlu/BLAS/zgerc.f b/superlu/BLAS/zgerc.f
deleted file mode 100644
index cf8e17d3..00000000
--- a/superlu/BLAS/zgerc.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b ZGERC
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZGERC  performs the rank 1 operation
-*>
-*>    A := alpha*x*y**H + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZGERC(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZGERC ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*DCONJG(Y(JY))
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*DCONJG(Y(JY))
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of ZGERC .
-*
-      END
diff --git a/superlu/BLAS/zgeru.f b/superlu/BLAS/zgeru.f
deleted file mode 100644
index d191740c..00000000
--- a/superlu/BLAS/zgeru.f
+++ /dev/null
@@ -1,227 +0,0 @@
-*> \brief \b ZGERU
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZGERU(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       INTEGER INCX,INCY,LDA,M,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZGERU  performs the rank 1 operation
-*>
-*>    A := alpha*x*y**T + A,
-*>
-*> where alpha is a scalar, x is an m element vector, y is an n element
-*> vector and A is an m by n matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of the matrix A.
-*>           M must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( m - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the m
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry, the leading m by n part of the array A must
-*>           contain the matrix of coefficients. On exit, A is
-*>           overwritten by the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZGERU(M,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      INTEGER INCX,INCY,LDA,M,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JY,KX
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (M.LT.0) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,M)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZGERU ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (INCY.GT.0) THEN
-          JY = 1
-      ELSE
-          JY = 1 - (N-1)*INCY
-      END IF
-      IF (INCX.EQ.1) THEN
-          DO 20 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  DO 10 I = 1,M
-                      A(I,J) = A(I,J) + X(I)*TEMP
-   10             CONTINUE
-              END IF
-              JY = JY + INCY
-   20     CONTINUE
-      ELSE
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (M-1)*INCX
-          END IF
-          DO 40 J = 1,N
-              IF (Y(JY).NE.ZERO) THEN
-                  TEMP = ALPHA*Y(JY)
-                  IX = KX
-                  DO 30 I = 1,M
-                      A(I,J) = A(I,J) + X(IX)*TEMP
-                      IX = IX + INCX
-   30             CONTINUE
-              END IF
-              JY = JY + INCY
-   40     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of ZGERU .
-*
-      END
diff --git a/superlu/BLAS/zhbmv.f b/superlu/BLAS/zhbmv.f
deleted file mode 100644
index 87422152..00000000
--- a/superlu/BLAS/zhbmv.f
+++ /dev/null
@@ -1,380 +0,0 @@
-*> \brief \b ZHBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER INCX,INCY,K,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHBMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian band matrix, with k super-diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the band matrix A is being supplied as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  being supplied.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  being supplied.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry, K specifies the number of super-diagonals of the
-*>           matrix A. K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer the upper
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the hermitian matrix, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer the lower
-*>           triangular part of a hermitian band matrix from conventional
-*>           full matrix storage to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the
-*>           vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of DIMENSION at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the
-*>           vector y. On exit, Y is overwritten by the updated vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHBMV(UPLO,N,K,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER INCX,INCY,K,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KPLUS1,KX,KY,L
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (K.LT.0) THEN
-          INFO = 3
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array A
-*     are accessed sequentially with one pass through A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when upper triangle of A is stored.
-*
-          KPLUS1 = K + 1
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  L = KPLUS1 - J
-                  DO 50 I = MAX(1,J-K),J - 1
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  L = KPLUS1 - J
-                  DO 70 I = MAX(1,J-K),J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(KPLUS1,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  IF (J.GT.K) THEN
-                      KX = KX + INCX
-                      KY = KY + INCY
-                  END IF
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when lower triangle of A is stored.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*DBLE(A(1,J))
-                  L = 1 - J
-                  DO 90 I = J + 1,MIN(N,J+K)
-                      Y(I) = Y(I) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(1,J))
-                  L = 1 - J
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,MIN(N,J+K)
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(L+I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(L+I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHBMV .
-*
-      END
diff --git a/superlu/BLAS/zhemm.f b/superlu/BLAS/zhemm.f
deleted file mode 100644
index 45a5eabd..00000000
--- a/superlu/BLAS/zhemm.f
+++ /dev/null
@@ -1,371 +0,0 @@
-*> \brief \b ZHEMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER LDA,LDB,LDC,M,N
-*       CHARACTER SIDE,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHEMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*A*B + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*B*A + beta*C,
-*>
-*> where alpha and beta are scalars, A is an hermitian matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  hermitian matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  hermitian  matrix   A  is  to  be
-*>           referenced as follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  hermitian matrix is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  hermitian matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  hermitian matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  hermitian
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Note that the imaginary parts  of the diagonal elements need
-*>           not be set, they are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHEMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER LDA,LDB,LDC,M,N
-      CHARACTER SIDE,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Set NROWA as the number of rows of A.
-*
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHEMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(SIDE,'L')) THEN
-*
-*        Form  C := alpha*A*B + beta*C.
-*
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*DCONJG(A(K,I))
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*DBLE(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*DBLE(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*DCONJG(A(K,I))
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*DBLE(A(I,I)) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*DBLE(A(I,I)) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*B*A + beta*C.
-*
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*DBLE(A(J,J))
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*DCONJG(A(J,K))
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*DCONJG(A(J,K))
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of ZHEMM .
-*
-      END
diff --git a/superlu/BLAS/zhemv.f b/superlu/BLAS/zhemv.f
deleted file mode 100644
index 37917459..00000000
--- a/superlu/BLAS/zhemv.f
+++ /dev/null
@@ -1,337 +0,0 @@
-*> \brief \b ZHEMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHEMV  performs the matrix-vector  operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHEMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 5
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHEMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when A is stored in upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(I)
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*DBLE(A(J,J)) + ALPHA*TEMP2
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 I = 1,J - 1
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(J,J)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when A is stored in lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*DBLE(A(J,J))
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(I)
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*DBLE(A(J,J))
-                  IX = JX
-                  IY = JY
-                  DO 110 I = J + 1,N
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*A(I,J)
-                      TEMP2 = TEMP2 + DCONJG(A(I,J))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHEMV .
-*
-      END
diff --git a/superlu/BLAS/zher.f b/superlu/BLAS/zher.f
deleted file mode 100644
index f7def760..00000000
--- a/superlu/BLAS/zher.f
+++ /dev/null
@@ -1,278 +0,0 @@
-*> \brief \b ZHER
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHER(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHER   performs the hermitian rank 1 operation
-*>
-*>    A := alpha*x*x**H + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHER(UPLO,N,ALPHA,X,INCX,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHER  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.DBLE(ZERO))) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in upper triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   10                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) + DBLE(X(J)*TEMP)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      IX = KX
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) + DBLE(X(JX)*TEMP)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in lower triangle.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      A(J,J) = DBLE(A(J,J)) + DBLE(TEMP*X(J))
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      A(J,J) = DBLE(A(J,J)) + DBLE(TEMP*X(JX))
-                      IX = JX
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          A(I,J) = A(I,J) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHER  .
-*
-      END
diff --git a/superlu/BLAS/zher2.f b/superlu/BLAS/zher2.f
deleted file mode 100644
index 94c132c4..00000000
--- a/superlu/BLAS/zher2.f
+++ /dev/null
@@ -1,317 +0,0 @@
-*> \brief \b ZHER2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       INTEGER INCX,INCY,LDA,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHER2  performs the hermitian rank 2 operation
-*>
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an n
-*> by n hermitian matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the array A is to be referenced as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of A
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of A
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular part of the hermitian matrix and the strictly
-*>           lower triangular part of A is not referenced. On exit, the
-*>           upper triangular part of the array A is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular part of the hermitian matrix and the strictly
-*>           upper triangular part of A is not referenced. On exit, the
-*>           lower triangular part of the array A is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHER2(UPLO,N,ALPHA,X,INCX,Y,INCY,A,LDA)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      INTEGER INCX,INCY,LDA,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHER2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through the triangular part
-*     of A.
-*
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when A is stored in the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      DO 10 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   10                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 I = 1,J - 1
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when A is stored in the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                      DO 50 I = J + 1,N
-                          A(I,J) = A(I,J) + X(I)*TEMP1 + Y(I)*TEMP2
-   50                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      A(J,J) = DBLE(A(J,J)) +
-     +                         DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 I = J + 1,N
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          A(I,J) = A(I,J) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      A(J,J) = DBLE(A(J,J))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHER2 .
-*
-      END
diff --git a/superlu/BLAS/zher2k.f b/superlu/BLAS/zher2k.f
deleted file mode 100644
index 407e8db5..00000000
--- a/superlu/BLAS/zher2k.f
+++ /dev/null
@@ -1,443 +0,0 @@
-*> \brief \b ZHER2K
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       DOUBLE PRECISION BETA
-*       INTEGER K,LDA,LDB,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHER2K  performs one of the hermitian rank 2k operations
-*>
-*>    C := alpha*A*B**H + conjg( alpha )*B*A**H + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**H*B + conjg( alpha )*B**H*A + beta*C,
-*>
-*> where  alpha and beta  are scalars with  beta  real,  C is an  n by n
-*> hermitian matrix and  A and B  are  n by k matrices in the first case
-*> and  k by n  matrices in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**H          +
-*>                                         conjg( alpha )*B*A**H +
-*>                                         beta*C.
-*>
-*>              TRANS = 'C' or 'c'    C := alpha*A**H*B          +
-*>                                         conjg( alpha )*B**H*A +
-*>                                         beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16 .
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*>           Unchanged on exit.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION .
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>
-*>  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      DOUBLE PRECISION BETA
-      INTEGER K,LDA,LDB,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE
-      PARAMETER (ONE=1.0D+0)
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHER2K',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.DBLE(ZERO)) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.DBLE(ZERO)) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*DBLE(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*B**H + conjg( alpha )*B*A**H +
-*                   C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.DBLE(ZERO)) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*DCONJG(B(J,L))
-                          TEMP2 = DCONJG(ALPHA*A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                          C(J,J) = DBLE(C(J,J)) +
-     +                             DBLE(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.DBLE(ZERO)) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*DCONJG(B(J,L))
-                          TEMP2 = DCONJG(ALPHA*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                          C(J,J) = DBLE(C(J,J)) +
-     +                             DBLE(A(J,L)*TEMP1+B(J,L)*TEMP2)
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**H*B + conjg( alpha )*B**H*A +
-*                   C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + DCONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + DCONJG(B(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(J,J) = DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*DBLE(C(J,J)) +
-     +                                 DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + DCONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 DCONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + DCONJG(A(L,I))*B(L,J)
-                          TEMP2 = TEMP2 + DCONJG(B(L,I))*A(L,J)
-  220                 CONTINUE
-                      IF (I.EQ.J) THEN
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(J,J) = DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          ELSE
-                              C(J,J) = BETA*DBLE(C(J,J)) +
-     +                                 DBLE(ALPHA*TEMP1+
-     +                                 DCONJG(ALPHA)*TEMP2)
-                          END IF
-                      ELSE
-                          IF (BETA.EQ.DBLE(ZERO)) THEN
-                              C(I,J) = ALPHA*TEMP1 + DCONJG(ALPHA)*TEMP2
-                          ELSE
-                              C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                                 DCONJG(ALPHA)*TEMP2
-                          END IF
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHER2K.
-*
-      END
diff --git a/superlu/BLAS/zherk.f b/superlu/BLAS/zherk.f
deleted file mode 100644
index d181ca0a..00000000
--- a/superlu/BLAS/zherk.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b ZHERK
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA,BETA
-*       INTEGER K,LDA,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHERK  performs one of the hermitian rank k operations
-*>
-*>    C := alpha*A*A**H + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**H*A + beta*C,
-*>
-*> where  alpha and beta  are  real scalars,  C is an  n by n  hermitian
-*> matrix and  A  is an  n by k  matrix in the  first case and a  k by n
-*> matrix in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**H + beta*C.
-*>
-*>              TRANS = 'C' or 'c'   C := alpha*A**H*A + beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'C' or 'c',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION .
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is DOUBLE PRECISION.
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  hermitian matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set,  they are assumed to be zero,  and on exit they
-*>           are set to zero.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*>
-*>  -- Modified 8-Nov-93 to set C(J,J) to DBLE( C(J,J) ) when BETA = 1.
-*>     Ed Anderson, Cray Research Inc.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA,BETA
-      INTEGER K,LDA,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCMPLX,DCONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      DOUBLE PRECISION RTEMP
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      DOUBLE PRECISION ONE,ZERO
-      PARAMETER (ONE=1.0D+0,ZERO=0.0D+0)
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'C'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHERK ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      C(J,J) = BETA*DBLE(C(J,J))
-                      DO 70 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*A**H + beta*C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J - 1
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                      C(J,J) = BETA*DBLE(C(J,J))
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.DCMPLX(ZERO)) THEN
-                          TEMP = ALPHA*DCONJG(A(J,L))
-                          DO 110 I = 1,J - 1
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                          C(J,J) = DBLE(C(J,J)) + DBLE(TEMP*A(I,L))
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      C(J,J) = BETA*DBLE(C(J,J))
-                      DO 150 I = J + 1,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  ELSE
-                      C(J,J) = DBLE(C(J,J))
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.DCMPLX(ZERO)) THEN
-                          TEMP = ALPHA*DCONJG(A(J,L))
-                          C(J,J) = DBLE(C(J,J)) + DBLE(TEMP*A(J,L))
-                          DO 160 I = J + 1,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**H*A + beta*C.
-*
-          IF (UPPER) THEN
-              DO 220 J = 1,N
-                  DO 200 I = 1,J - 1
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-                  RTEMP = ZERO
-                  DO 210 L = 1,K
-                      RTEMP = RTEMP + DCONJG(A(L,J))*A(L,J)
-  210             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*DBLE(C(J,J))
-                  END IF
-  220         CONTINUE
-          ELSE
-              DO 260 J = 1,N
-                  RTEMP = ZERO
-                  DO 230 L = 1,K
-                      RTEMP = RTEMP + DCONJG(A(L,J))*A(L,J)
-  230             CONTINUE
-                  IF (BETA.EQ.ZERO) THEN
-                      C(J,J) = ALPHA*RTEMP
-                  ELSE
-                      C(J,J) = ALPHA*RTEMP + BETA*DBLE(C(J,J))
-                  END IF
-                  DO 250 I = J + 1,N
-                      TEMP = ZERO
-                      DO 240 L = 1,K
-                          TEMP = TEMP + DCONJG(A(L,I))*A(L,J)
-  240                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  250             CONTINUE
-  260         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHERK .
-*
-      END
diff --git a/superlu/BLAS/zhpmv.f b/superlu/BLAS/zhpmv.f
deleted file mode 100644
index 0d5d325b..00000000
--- a/superlu/BLAS/zhpmv.f
+++ /dev/null
@@ -1,338 +0,0 @@
-*> \brief \b ZHPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHPMV  performs the matrix-vector operation
-*>
-*>    y := alpha*A*x + beta*y,
-*>
-*> where alpha and beta are scalars, x and y are n element vectors and
-*> A is an n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set and are assumed to be zero.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta. When BETA is
-*>           supplied as zero then Y need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y. On exit, Y is overwritten by the updated
-*>           vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHPMV(UPLO,N,ALPHA,AP,X,INCX,BETA,Y,INCY)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 6
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     Set up the start points in  X  and  Y.
-*
-      IF (INCX.GT.0) THEN
-          KX = 1
-      ELSE
-          KX = 1 - (N-1)*INCX
-      END IF
-      IF (INCY.GT.0) THEN
-          KY = 1
-      ELSE
-          KY = 1 - (N-1)*INCY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-*     First form  y := beta*y.
-*
-      IF (BETA.NE.ONE) THEN
-          IF (INCY.EQ.1) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 10 I = 1,N
-                      Y(I) = ZERO
-   10             CONTINUE
-              ELSE
-                  DO 20 I = 1,N
-                      Y(I) = BETA*Y(I)
-   20             CONTINUE
-              END IF
-          ELSE
-              IY = KY
-              IF (BETA.EQ.ZERO) THEN
-                  DO 30 I = 1,N
-                      Y(IY) = ZERO
-                      IY = IY + INCY
-   30             CONTINUE
-              ELSE
-                  DO 40 I = 1,N
-                      Y(IY) = BETA*Y(IY)
-                      IY = IY + INCY
-   40             CONTINUE
-              END IF
-          END IF
-      END IF
-      IF (ALPHA.EQ.ZERO) RETURN
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  y  when AP contains the upper triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  K = KK
-                  DO 50 I = 1,J - 1
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(I)
-                      K = K + 1
-   50             CONTINUE
-                  Y(J) = Y(J) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2
-                  KK = KK + J
-   60         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 80 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  IX = KX
-                  IY = KY
-                  DO 70 K = KK,KK + J - 2
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX)
-                      IX = IX + INCX
-                      IY = IY + INCY
-   70             CONTINUE
-                  Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK+J-1)) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   80         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  y  when AP contains the lower triangle.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 100 J = 1,N
-                  TEMP1 = ALPHA*X(J)
-                  TEMP2 = ZERO
-                  Y(J) = Y(J) + TEMP1*DBLE(AP(KK))
-                  K = KK + 1
-                  DO 90 I = J + 1,N
-                      Y(I) = Y(I) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(I)
-                      K = K + 1
-   90             CONTINUE
-                  Y(J) = Y(J) + ALPHA*TEMP2
-                  KK = KK + (N-J+1)
-  100         CONTINUE
-          ELSE
-              JX = KX
-              JY = KY
-              DO 120 J = 1,N
-                  TEMP1 = ALPHA*X(JX)
-                  TEMP2 = ZERO
-                  Y(JY) = Y(JY) + TEMP1*DBLE(AP(KK))
-                  IX = JX
-                  IY = JY
-                  DO 110 K = KK + 1,KK + N - J
-                      IX = IX + INCX
-                      IY = IY + INCY
-                      Y(IY) = Y(IY) + TEMP1*AP(K)
-                      TEMP2 = TEMP2 + DCONJG(AP(K))*X(IX)
-  110             CONTINUE
-                  Y(JY) = Y(JY) + ALPHA*TEMP2
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + (N-J+1)
-  120         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHPMV .
-*
-      END
diff --git a/superlu/BLAS/zhpr.f b/superlu/BLAS/zhpr.f
deleted file mode 100644
index 70051c8a..00000000
--- a/superlu/BLAS/zhpr.f
+++ /dev/null
@@ -1,279 +0,0 @@
-*> \brief \b ZHPR
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*       .. Scalar Arguments ..
-*       DOUBLE PRECISION ALPHA
-*       INTEGER INCX,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHPR    performs the hermitian rank 1 operation
-*>
-*>    A := alpha*x*x**H + A,
-*>
-*> where alpha is a real scalar, x is an n element vector and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is DOUBLE PRECISION.
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHPR(UPLO,N,ALPHA,X,INCX,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      DOUBLE PRECISION ALPHA
-      INTEGER INCX,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHPR  ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.DBLE(ZERO))) RETURN
-*
-*     Set the start point in X if the increment is not unity.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 20 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) + DBLE(X(J)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              JX = KX
-              DO 40 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      IX = KX
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP
-                          IX = IX + INCX
-   30                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) + DBLE(X(JX)*TEMP)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF (INCX.EQ.1) THEN
-              DO 60 J = 1,N
-                  IF (X(J).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(J))
-                      AP(KK) = DBLE(AP(KK)) + DBLE(TEMP*X(J))
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              JX = KX
-              DO 80 J = 1,N
-                  IF (X(JX).NE.ZERO) THEN
-                      TEMP = ALPHA*DCONJG(X(JX))
-                      AP(KK) = DBLE(AP(KK)) + DBLE(TEMP*X(JX))
-                      IX = JX
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          AP(K) = AP(K) + X(IX)*TEMP
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHPR  .
-*
-      END
diff --git a/superlu/BLAS/zhpr2.f b/superlu/BLAS/zhpr2.f
deleted file mode 100644
index c9fb7585..00000000
--- a/superlu/BLAS/zhpr2.f
+++ /dev/null
@@ -1,318 +0,0 @@
-*> \brief \b ZHPR2
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZHPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       INTEGER INCX,INCY,N
-*       CHARACTER UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*),Y(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZHPR2  performs the hermitian rank 2 operation
-*>
-*>    A := alpha*x*y**H + conjg( alpha )*y*x**H + A,
-*>
-*> where alpha is a scalar, x and y are n element vectors and A is an
-*> n by n hermitian matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the upper or lower
-*>           triangular part of the matrix A is supplied in the packed
-*>           array AP as follows:
-*>
-*>              UPLO = 'U' or 'u'   The upper triangular part of A is
-*>                                  supplied in AP.
-*>
-*>              UPLO = 'L' or 'l'   The lower triangular part of A is
-*>                                  supplied in AP.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*>
-*> \param[in] Y
-*> \verbatim
-*>          Y is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCY ) ).
-*>           Before entry, the incremented array Y must contain the n
-*>           element vector y.
-*> \endverbatim
-*>
-*> \param[in] INCY
-*> \verbatim
-*>          INCY is INTEGER
-*>           On entry, INCY specifies the increment for the elements of
-*>           Y. INCY must not be zero.
-*> \endverbatim
-*>
-*> \param[in,out] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 1, 2 )
-*>           and a( 2, 2 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the upper triangular part of the
-*>           updated matrix.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular part of the hermitian matrix
-*>           packed sequentially, column by column, so that AP( 1 )
-*>           contains a( 1, 1 ), AP( 2 ) and AP( 3 ) contain a( 2, 1 )
-*>           and a( 3, 1 ) respectively, and so on. On exit, the array
-*>           AP is overwritten by the lower triangular part of the
-*>           updated matrix.
-*>           Note that the imaginary parts of the diagonal elements need
-*>           not be set, they are assumed to be zero, and on exit they
-*>           are set to zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZHPR2(UPLO,N,ALPHA,X,INCX,Y,INCY,AP)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      INTEGER INCX,INCY,N
-      CHARACTER UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*),Y(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,IX,IY,J,JX,JY,K,KK,KX,KY
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DBLE,DCONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (N.LT.0) THEN
-          INFO = 2
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 5
-      ELSE IF (INCY.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZHPR2 ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (ALPHA.EQ.ZERO)) RETURN
-*
-*     Set up the start points in X and Y if the increments are not both
-*     unity.
-*
-      IF ((INCX.NE.1) .OR. (INCY.NE.1)) THEN
-          IF (INCX.GT.0) THEN
-              KX = 1
-          ELSE
-              KX = 1 - (N-1)*INCX
-          END IF
-          IF (INCY.GT.0) THEN
-              KY = 1
-          ELSE
-              KY = 1 - (N-1)*INCY
-          END IF
-          JX = KX
-          JY = KY
-      END IF
-*
-*     Start the operations. In this version the elements of the array AP
-*     are accessed sequentially with one pass through AP.
-*
-      KK = 1
-      IF (LSAME(UPLO,'U')) THEN
-*
-*        Form  A  when upper triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 20 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      K = KK
-                      DO 10 I = 1,J - 1
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   10                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) +
-     +                             DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  KK = KK + J
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      IX = KX
-                      IY = KY
-                      DO 30 K = KK,KK + J - 2
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-                          IX = IX + INCX
-                          IY = IY + INCY
-   30                 CONTINUE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1)) +
-     +                             DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                  ELSE
-                      AP(KK+J-1) = DBLE(AP(KK+J-1))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + J
-   40         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  A  when lower triangle is stored in AP.
-*
-          IF ((INCX.EQ.1) .AND. (INCY.EQ.1)) THEN
-              DO 60 J = 1,N
-                  IF ((X(J).NE.ZERO) .OR. (Y(J).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(J))
-                      TEMP2 = DCONJG(ALPHA*X(J))
-                      AP(KK) = DBLE(AP(KK)) +
-     +                         DBLE(X(J)*TEMP1+Y(J)*TEMP2)
-                      K = KK + 1
-                      DO 50 I = J + 1,N
-                          AP(K) = AP(K) + X(I)*TEMP1 + Y(I)*TEMP2
-                          K = K + 1
-   50                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  KK = KK + N - J + 1
-   60         CONTINUE
-          ELSE
-              DO 80 J = 1,N
-                  IF ((X(JX).NE.ZERO) .OR. (Y(JY).NE.ZERO)) THEN
-                      TEMP1 = ALPHA*DCONJG(Y(JY))
-                      TEMP2 = DCONJG(ALPHA*X(JX))
-                      AP(KK) = DBLE(AP(KK)) +
-     +                         DBLE(X(JX)*TEMP1+Y(JY)*TEMP2)
-                      IX = JX
-                      IY = JY
-                      DO 70 K = KK + 1,KK + N - J
-                          IX = IX + INCX
-                          IY = IY + INCY
-                          AP(K) = AP(K) + X(IX)*TEMP1 + Y(IY)*TEMP2
-   70                 CONTINUE
-                  ELSE
-                      AP(KK) = DBLE(AP(KK))
-                  END IF
-                  JX = JX + INCX
-                  JY = JY + INCY
-                  KK = KK + N - J + 1
-   80         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZHPR2 .
-*
-      END
diff --git a/superlu/BLAS/zrotg.f b/superlu/BLAS/zrotg.f
deleted file mode 100644
index e5c406db..00000000
--- a/superlu/BLAS/zrotg.f
+++ /dev/null
@@ -1,75 +0,0 @@
-*> \brief \b ZROTG
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZROTG(CA,CB,C,S)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 CA,CB,S
-*       DOUBLE PRECISION C
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ZROTG determines a double complex Givens rotation.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*  =====================================================================
-      SUBROUTINE ZROTG(CA,CB,C,S)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 CA,CB,S
-      DOUBLE PRECISION C
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX*16 ALPHA
-      DOUBLE PRECISION NORM,SCALE
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC CDABS,DCMPLX,DCONJG,DSQRT
-*     ..
-      IF (CDABS(CA).EQ.0.0d0) THEN
-         C = 0.0d0
-         S = (1.0d0,0.0d0)
-         CA = CB
-      ELSE
-         SCALE = CDABS(CA) + CDABS(CB)
-         NORM = SCALE*DSQRT((CDABS(CA/DCMPLX(SCALE,0.0d0)))**2+
-     $       (CDABS(CB/DCMPLX(SCALE,0.0d0)))**2)
-         ALPHA = CA/CDABS(CA)
-         C = CDABS(CA)/NORM
-         S = ALPHA*DCONJG(CB)/NORM
-         CA = ALPHA*NORM
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zscal.f b/superlu/BLAS/zscal.f
deleted file mode 100644
index ca038aac..00000000
--- a/superlu/BLAS/zscal.f
+++ /dev/null
@@ -1,91 +0,0 @@
-*> \brief \b ZSCAL
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZSCAL(N,ZA,ZX,INCX)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ZA
-*       INTEGER INCX,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ZSCAL scales a vector by a constant.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 3/93 to return if incx .le. 0.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZSCAL(N,ZA,ZX,INCX)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ZA
-      INTEGER INCX,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      INTEGER I,NINCX
-*     ..
-      IF (N.LE.0 .OR. INCX.LE.0) RETURN
-      IF (INCX.EQ.1) THEN
-*
-*        code for increment equal to 1
-*
-         DO I = 1,N
-            ZX(I) = ZA*ZX(I)
-         END DO
-      ELSE
-*
-*        code for increment not equal to 1
-*
-         NINCX = N*INCX
-         DO I = 1,NINCX,INCX
-            ZX(I) = ZA*ZX(I)
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zswap.f b/superlu/BLAS/zswap.f
deleted file mode 100644
index 02a5b97e..00000000
--- a/superlu/BLAS/zswap.f
+++ /dev/null
@@ -1,98 +0,0 @@
-*> \brief \b ZSWAP
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,INCY,N
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 ZX(*),ZY(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*>    ZSWAP interchanges two vectors.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level1
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>     jack dongarra, 3/11/78.
-*>     modified 12/3/93, array(1) declarations changed to array(*)
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZSWAP(N,ZX,INCX,ZY,INCY)
-*
-*  -- Reference BLAS level1 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,INCY,N
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 ZX(*),ZY(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Local Scalars ..
-      COMPLEX*16 ZTEMP
-      INTEGER I,IX,IY
-*     ..
-      IF (N.LE.0) RETURN
-      IF (INCX.EQ.1 .AND. INCY.EQ.1) THEN
-*
-*       code for both increments equal to 1
-         DO I = 1,N
-            ZTEMP = ZX(I)
-            ZX(I) = ZY(I)
-            ZY(I) = ZTEMP
-         END DO
-      ELSE
-*
-*       code for unequal increments or equal increments not equal
-*         to 1
-*
-         IX = 1
-         IY = 1
-         IF (INCX.LT.0) IX = (-N+1)*INCX + 1
-         IF (INCY.LT.0) IY = (-N+1)*INCY + 1
-         DO I = 1,N
-            ZTEMP = ZX(IX)
-            ZX(IX) = ZY(IY)
-            ZY(IY) = ZTEMP
-            IX = IX + INCX
-            IY = IY + INCY
-         END DO
-      END IF
-      RETURN
-      END
diff --git a/superlu/BLAS/zsymm.f b/superlu/BLAS/zsymm.f
deleted file mode 100644
index 1dc267a7..00000000
--- a/superlu/BLAS/zsymm.f
+++ /dev/null
@@ -1,369 +0,0 @@
-*> \brief \b ZSYMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER LDA,LDB,LDC,M,N
-*       CHARACTER SIDE,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZSYMM  performs one of the matrix-matrix operations
-*>
-*>    C := alpha*A*B + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*B*A + beta*C,
-*>
-*> where  alpha and beta are scalars, A is a symmetric matrix and  B and
-*> C are m by n matrices.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE  specifies whether  the  symmetric matrix  A
-*>           appears on the  left or right  in the  operation as follows:
-*>
-*>              SIDE = 'L' or 'l'   C := alpha*A*B + beta*C,
-*>
-*>              SIDE = 'R' or 'r'   C := alpha*B*A + beta*C,
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of  the  symmetric  matrix   A  is  to  be
-*>           referenced as follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the upper triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the lower triangular part of the
-*>                                  symmetric matrix is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry,  M  specifies the number of rows of the matrix  C.
-*>           M  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of the matrix C.
-*>           N  must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           m  when  SIDE = 'L' or 'l'  and is n  otherwise.
-*>           Before entry  with  SIDE = 'L' or 'l',  the  m by m  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading m by m upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  m by m  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*>           Before entry  with  SIDE = 'R' or 'r',  the  n by n  part of
-*>           the array  A  must contain the  symmetric matrix,  such that
-*>           when  UPLO = 'U' or 'u', the leading n by n upper triangular
-*>           part of the array  A  must contain the upper triangular part
-*>           of the  symmetric matrix and the  strictly  lower triangular
-*>           part of  A  is not referenced,  and when  UPLO = 'L' or 'l',
-*>           the leading  n by n  lower triangular part  of the  array  A
-*>           must  contain  the  lower triangular part  of the  symmetric
-*>           matrix and the  strictly upper triangular part of  A  is not
-*>           referenced.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the  calling (sub) program. When  SIDE = 'L' or 'l'  then
-*>           LDA must be at least  max( 1, m ), otherwise  LDA must be at
-*>           least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry, the leading  m by n part of the array  B  must
-*>           contain the matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry,  BETA  specifies the scalar  beta.  When  BETA  is
-*>           supplied as zero then C need not be set on input.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry, the leading  m by n  part of the array  C must
-*>           contain the matrix  C,  except when  beta  is zero, in which
-*>           case C need not be set on entry.
-*>           On exit, the array  C  is overwritten by the  m by n updated
-*>           matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZSYMM(SIDE,UPLO,M,N,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER LDA,LDB,LDC,M,N
-      CHARACTER SIDE,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Set NROWA as the number of rows of A.
-*
-      IF (LSAME(SIDE,'L')) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF ((.NOT.LSAME(SIDE,'L')) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF (M.LT.0) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,M)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZSYMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((M.EQ.0) .OR. (N.EQ.0) .OR.
-     +    ((ALPHA.EQ.ZERO).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (BETA.EQ.ZERO) THEN
-              DO 20 J = 1,N
-                  DO 10 I = 1,M
-                      C(I,J) = ZERO
-   10             CONTINUE
-   20         CONTINUE
-          ELSE
-              DO 40 J = 1,N
-                  DO 30 I = 1,M
-                      C(I,J) = BETA*C(I,J)
-   30             CONTINUE
-   40         CONTINUE
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(SIDE,'L')) THEN
-*
-*        Form  C := alpha*A*B + beta*C.
-*
-          IF (UPPER) THEN
-              DO 70 J = 1,N
-                  DO 60 I = 1,M
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 50 K = 1,I - 1
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   50                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   60             CONTINUE
-   70         CONTINUE
-          ELSE
-              DO 100 J = 1,N
-                  DO 90 I = M,1,-1
-                      TEMP1 = ALPHA*B(I,J)
-                      TEMP2 = ZERO
-                      DO 80 K = I + 1,M
-                          C(K,J) = C(K,J) + TEMP1*A(K,I)
-                          TEMP2 = TEMP2 + B(K,J)*A(K,I)
-   80                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = TEMP1*A(I,I) + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + TEMP1*A(I,I) +
-     +                             ALPHA*TEMP2
-                      END IF
-   90             CONTINUE
-  100         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*B*A + beta*C.
-*
-          DO 170 J = 1,N
-              TEMP1 = ALPHA*A(J,J)
-              IF (BETA.EQ.ZERO) THEN
-                  DO 110 I = 1,M
-                      C(I,J) = TEMP1*B(I,J)
-  110             CONTINUE
-              ELSE
-                  DO 120 I = 1,M
-                      C(I,J) = BETA*C(I,J) + TEMP1*B(I,J)
-  120             CONTINUE
-              END IF
-              DO 140 K = 1,J - 1
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(K,J)
-                  ELSE
-                      TEMP1 = ALPHA*A(J,K)
-                  END IF
-                  DO 130 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  130             CONTINUE
-  140         CONTINUE
-              DO 160 K = J + 1,N
-                  IF (UPPER) THEN
-                      TEMP1 = ALPHA*A(J,K)
-                  ELSE
-                      TEMP1 = ALPHA*A(K,J)
-                  END IF
-                  DO 150 I = 1,M
-                      C(I,J) = C(I,J) + TEMP1*B(I,K)
-  150             CONTINUE
-  160         CONTINUE
-  170     CONTINUE
-      END IF
-*
-      RETURN
-*
-*     End of ZSYMM .
-*
-      END
diff --git a/superlu/BLAS/zsyr2k.f b/superlu/BLAS/zsyr2k.f
deleted file mode 100644
index d358ed00..00000000
--- a/superlu/BLAS/zsyr2k.f
+++ /dev/null
@@ -1,396 +0,0 @@
-*> \brief \b ZSYR2K
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER K,LDA,LDB,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZSYR2K  performs one of the symmetric rank 2k operations
-*>
-*>    C := alpha*A*B**T + alpha*B*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*B + alpha*B**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A and B  are  n by k  matrices  in the  first  case  and  k by n
-*> matrices in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'    C := alpha*A*B**T + alpha*B*A**T +
-*>                                         beta*C.
-*>
-*>              TRANS = 'T' or 't'    C := alpha*A**T*B + alpha*B**T*A +
-*>                                         beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns  of the  matrices  A and B,  and on  entry  with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrices  A and B.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, kb ), where kb is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  B  must contain the matrix  B,  otherwise
-*>           the leading  k by n  part of the array  B  must contain  the
-*>           matrix B.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDB must be at least  max( 1, n ), otherwise  LDB must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER K,LDA,LDB,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP1,TEMP2
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDB.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 12
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZSYR2K',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*B**T + alpha*B*A**T + C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF ((A(J,L).NE.ZERO) .OR. (B(J,L).NE.ZERO)) THEN
-                          TEMP1 = ALPHA*B(J,L)
-                          TEMP2 = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + A(I,L)*TEMP1 +
-     +                                 B(I,L)*TEMP2
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*B + alpha*B**T*A + C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 190 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP1 = ZERO
-                      TEMP2 = ZERO
-                      DO 220 L = 1,K
-                          TEMP1 = TEMP1 + A(L,I)*B(L,J)
-                          TEMP2 = TEMP2 + B(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP1 + ALPHA*TEMP2
-                      ELSE
-                          C(I,J) = BETA*C(I,J) + ALPHA*TEMP1 +
-     +                             ALPHA*TEMP2
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZSYR2K.
-*
-      END
diff --git a/superlu/BLAS/zsyrk.f b/superlu/BLAS/zsyrk.f
deleted file mode 100644
index 79591b45..00000000
--- a/superlu/BLAS/zsyrk.f
+++ /dev/null
@@ -1,363 +0,0 @@
-*> \brief \b ZSYRK
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA,BETA
-*       INTEGER K,LDA,LDC,N
-*       CHARACTER TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),C(LDC,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZSYRK  performs one of the symmetric rank k operations
-*>
-*>    C := alpha*A*A**T + beta*C,
-*>
-*> or
-*>
-*>    C := alpha*A**T*A + beta*C,
-*>
-*> where  alpha and beta  are scalars,  C is an  n by n symmetric matrix
-*> and  A  is an  n by k  matrix in the first case and a  k by n  matrix
-*> in the second case.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On  entry,   UPLO  specifies  whether  the  upper  or  lower
-*>           triangular  part  of the  array  C  is to be  referenced  as
-*>           follows:
-*>
-*>              UPLO = 'U' or 'u'   Only the  upper triangular part of  C
-*>                                  is to be referenced.
-*>
-*>              UPLO = 'L' or 'l'   Only the  lower triangular part of  C
-*>                                  is to be referenced.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry,  TRANS  specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   C := alpha*A*A**T + beta*C.
-*>
-*>              TRANS = 'T' or 't'   C := alpha*A**T*A + beta*C.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry,  N specifies the order of the matrix C.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with  TRANS = 'N' or 'n',  K  specifies  the number
-*>           of  columns   of  the   matrix   A,   and  on   entry   with
-*>           TRANS = 'T' or 't',  K  specifies  the number of rows of the
-*>           matrix A.  K must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry, ALPHA specifies the scalar alpha.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, ka ), where ka is
-*>           k  when  TRANS = 'N' or 'n',  and is  n  otherwise.
-*>           Before entry with  TRANS = 'N' or 'n',  the  leading  n by k
-*>           part of the array  A  must contain the matrix  A,  otherwise
-*>           the leading  k by n  part of the array  A  must contain  the
-*>           matrix A.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in  the  calling  (sub)  program.   When  TRANS = 'N' or 'n'
-*>           then  LDA must be at least  max( 1, n ), otherwise  LDA must
-*>           be at least  max( 1, k ).
-*> \endverbatim
-*>
-*> \param[in] BETA
-*> \verbatim
-*>          BETA is COMPLEX*16
-*>           On entry, BETA specifies the scalar beta.
-*> \endverbatim
-*>
-*> \param[in,out] C
-*> \verbatim
-*>          C is COMPLEX*16 array of DIMENSION ( LDC, n ).
-*>           Before entry  with  UPLO = 'U' or 'u',  the leading  n by n
-*>           upper triangular part of the array C must contain the upper
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           lower triangular part of C is not referenced.  On exit, the
-*>           upper triangular part of the array  C is overwritten by the
-*>           upper triangular part of the updated matrix.
-*>           Before entry  with  UPLO = 'L' or 'l',  the leading  n by n
-*>           lower triangular part of the array C must contain the lower
-*>           triangular part  of the  symmetric matrix  and the strictly
-*>           upper triangular part of C is not referenced.  On exit, the
-*>           lower triangular part of the array  C is overwritten by the
-*>           lower triangular part of the updated matrix.
-*> \endverbatim
-*>
-*> \param[in] LDC
-*> \verbatim
-*>          LDC is INTEGER
-*>           On entry, LDC specifies the first dimension of C as declared
-*>           in  the  calling  (sub)  program.   LDC  must  be  at  least
-*>           max( 1, n ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA,BETA
-      INTEGER K,LDA,LDC,N
-      CHARACTER TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),C(LDC,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,J,L,NROWA
-      LOGICAL UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      IF (LSAME(TRANS,'N')) THEN
-          NROWA = N
-      ELSE
-          NROWA = K
-      END IF
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.LSAME(TRANS,'N')) .AND.
-     +         (.NOT.LSAME(TRANS,'T'))) THEN
-          INFO = 2
-      ELSE IF (N.LT.0) THEN
-          INFO = 3
-      ELSE IF (K.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 7
-      ELSE IF (LDC.LT.MAX(1,N)) THEN
-          INFO = 10
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZSYRK ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF ((N.EQ.0) .OR. (((ALPHA.EQ.ZERO).OR.
-     +    (K.EQ.0)).AND. (BETA.EQ.ONE))) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          IF (UPPER) THEN
-              IF (BETA.EQ.ZERO) THEN
-                  DO 20 J = 1,N
-                      DO 10 I = 1,J
-                          C(I,J) = ZERO
-   10                 CONTINUE
-   20             CONTINUE
-              ELSE
-                  DO 40 J = 1,N
-                      DO 30 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-   30                 CONTINUE
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (BETA.EQ.ZERO) THEN
-                  DO 60 J = 1,N
-                      DO 50 I = J,N
-                          C(I,J) = ZERO
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 I = J,N
-                          C(I,J) = BETA*C(I,J)
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          END IF
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  C := alpha*A*A**T + beta*C.
-*
-          IF (UPPER) THEN
-              DO 130 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 90 I = 1,J
-                          C(I,J) = ZERO
-   90                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 100 I = 1,J
-                          C(I,J) = BETA*C(I,J)
-  100                 CONTINUE
-                  END IF
-                  DO 120 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 110 I = 1,J
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  110                     CONTINUE
-                      END IF
-  120             CONTINUE
-  130         CONTINUE
-          ELSE
-              DO 180 J = 1,N
-                  IF (BETA.EQ.ZERO) THEN
-                      DO 140 I = J,N
-                          C(I,J) = ZERO
-  140                 CONTINUE
-                  ELSE IF (BETA.NE.ONE) THEN
-                      DO 150 I = J,N
-                          C(I,J) = BETA*C(I,J)
-  150                 CONTINUE
-                  END IF
-                  DO 170 L = 1,K
-                      IF (A(J,L).NE.ZERO) THEN
-                          TEMP = ALPHA*A(J,L)
-                          DO 160 I = J,N
-                              C(I,J) = C(I,J) + TEMP*A(I,L)
-  160                     CONTINUE
-                      END IF
-  170             CONTINUE
-  180         CONTINUE
-          END IF
-      ELSE
-*
-*        Form  C := alpha*A**T*A + beta*C.
-*
-          IF (UPPER) THEN
-              DO 210 J = 1,N
-                  DO 200 I = 1,J
-                      TEMP = ZERO
-                      DO 190 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  190                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  200             CONTINUE
-  210         CONTINUE
-          ELSE
-              DO 240 J = 1,N
-                  DO 230 I = J,N
-                      TEMP = ZERO
-                      DO 220 L = 1,K
-                          TEMP = TEMP + A(L,I)*A(L,J)
-  220                 CONTINUE
-                      IF (BETA.EQ.ZERO) THEN
-                          C(I,J) = ALPHA*TEMP
-                      ELSE
-                          C(I,J) = ALPHA*TEMP + BETA*C(I,J)
-                      END IF
-  230             CONTINUE
-  240         CONTINUE
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZSYRK .
-*
-      END
diff --git a/superlu/BLAS/ztbmv.f b/superlu/BLAS/ztbmv.f
deleted file mode 100644
index 1e03f2ba..00000000
--- a/superlu/BLAS/ztbmv.f
+++ /dev/null
@@ -1,429 +0,0 @@
-*> \brief \b ZTBMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTBMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular band matrix, with ( k + 1 ) diagonals.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTBMV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTBMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX   too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*         Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = KPLUS1 - J
-                          DO 10 I = MAX(1,J-K),J - 1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(KPLUS1,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = KPLUS1 - J
-                          DO 30 I = MAX(1,J-K),J - 1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(KPLUS1,J)
-                      END IF
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          L = 1 - J
-                          DO 50 I = MIN(N,J+K),J + 1,-1
-                              X(I) = X(I) + TEMP*A(L+I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(1,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          L = 1 - J
-                          DO 70 I = MIN(N,J+K),J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(L+I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(1,J)
-                      END IF
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x  or  x := A**H*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 90 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J))
-                          DO 100 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      KX = KX - INCX
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(KPLUS1,J)
-                          DO 120 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(KPLUS1,J))
-                          DO 130 I = J - 1,MAX(1,J-K),-1
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 150 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J))
-                          DO 160 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      KX = KX + INCX
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(1,J)
-                          DO 180 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(1,J))
-                          DO 190 I = J + 1,MIN(N,J+K)
-                              TEMP = TEMP + DCONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTBMV .
-*
-      END
diff --git a/superlu/BLAS/ztbsv.f b/superlu/BLAS/ztbsv.f
deleted file mode 100644
index 50c4bb42..00000000
--- a/superlu/BLAS/ztbsv.f
+++ /dev/null
@@ -1,432 +0,0 @@
-*> \brief \b ZTBSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,K,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTBSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular band matrix, with ( k + 1 )
-*> diagonals.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] K
-*> \verbatim
-*>          K is INTEGER
-*>           On entry with UPLO = 'U' or 'u', K specifies the number of
-*>           super-diagonals of the matrix A.
-*>           On entry with UPLO = 'L' or 'l', K specifies the number of
-*>           sub-diagonals of the matrix A.
-*>           K must satisfy  0 .le. K.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with UPLO = 'U' or 'u', the leading ( k + 1 )
-*>           by n part of the array A must contain the upper triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row
-*>           ( k + 1 ) of the array, the first super-diagonal starting at
-*>           position 2 in row k, and so on. The top left k by k triangle
-*>           of the array A is not referenced.
-*>           The following program segment will transfer an upper
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = K + 1 - J
-*>                    DO 10, I = MAX( 1, J - K ), J
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Before entry with UPLO = 'L' or 'l', the leading ( k + 1 )
-*>           by n part of the array A must contain the lower triangular
-*>           band part of the matrix of coefficients, supplied column by
-*>           column, with the leading diagonal of the matrix in row 1 of
-*>           the array, the first sub-diagonal starting at position 1 in
-*>           row 2, and so on. The bottom right k by k triangle of the
-*>           array A is not referenced.
-*>           The following program segment will transfer a lower
-*>           triangular band matrix from conventional full matrix storage
-*>           to band storage:
-*>
-*>                 DO 20, J = 1, N
-*>                    M = 1 - J
-*>                    DO 10, I = J, MIN( N, J + K )
-*>                       A( M + I, J ) = matrix( I, J )
-*>              10    CONTINUE
-*>              20 CONTINUE
-*>
-*>           Note that when DIAG = 'U' or 'u' the elements of the array A
-*>           corresponding to the diagonal elements of the matrix are not
-*>           referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           ( k + 1 ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTBSV(UPLO,TRANS,DIAG,N,K,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,K,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,KPLUS1,KX,L
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX,MIN
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (K.LT.0) THEN
-          INFO = 5
-      ELSE IF (LDA.LT. (K+1)) THEN
-          INFO = 7
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 9
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTBSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed by sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(KPLUS1,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,MAX(1,J-K),-1
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 40 J = N,1,-1
-                      KX = KX - INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = KPLUS1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(KPLUS1,J)
-                          TEMP = X(JX)
-                          DO 30 I = J - 1,MAX(1,J-K),-1
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX - INCX
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          L = 1 - J
-                          IF (NOUNIT) X(J) = X(J)/A(1,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,MIN(N,J+K)
-                              X(I) = X(I) - TEMP*A(L+I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      KX = KX + INCX
-                      IF (X(JX).NE.ZERO) THEN
-                          IX = KX
-                          L = 1 - J
-                          IF (NOUNIT) X(JX) = X(JX)/A(1,J)
-                          TEMP = X(JX)
-                          DO 70 I = J + 1,MIN(N,J+K)
-                              X(IX) = X(IX) - TEMP*A(L+I,J)
-                              IX = IX + INCX
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KPLUS1 = K + 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 90 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 100 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(KPLUS1,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      L = KPLUS1 - J
-                      IF (NOCONJ) THEN
-                          DO 120 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(KPLUS1,J)
-                      ELSE
-                          DO 130 I = MAX(1,J-K),J - 1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(KPLUS1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      IF (J.GT.K) KX = KX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 150 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 160 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(1,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      L = 1 - J
-                      IF (NOCONJ) THEN
-                          DO 180 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - A(L+I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(1,J)
-                      ELSE
-                          DO 190 I = MIN(N,J+K),J + 1,-1
-                              TEMP = TEMP - DCONJG(A(L+I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(1,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      IF ((N-J).GE.K) KX = KX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTBSV .
-*
-      END
diff --git a/superlu/BLAS/ztpmv.f b/superlu/BLAS/ztpmv.f
deleted file mode 100644
index d9aae425..00000000
--- a/superlu/BLAS/ztpmv.f
+++ /dev/null
@@ -1,388 +0,0 @@
-*> \brief \b ZTPMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTPMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix, supplied in packed form.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTPMV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTPMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x:= A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K + 1
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK+J-1)
-                      END IF
-                      KK = KK + J
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 K = KK,KK + J - 2
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK+J-1)
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          K = KK
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*AP(K)
-                              K = K - 1
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*AP(KK-N+J)
-                      END IF
-                      KK = KK - (N-J+1)
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 K = KK,KK - (N- (J+1)),-1
-                              X(IX) = X(IX) + TEMP*AP(K)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*AP(KK-N+J)
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x  or  x := A**H*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK - 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K - 1
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + DCONJG(AP(K))*X(I)
-                              K = K - 1
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - J
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 120 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 130 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + DCONJG(AP(K))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      K = KK + 1
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + AP(K)*X(I)
-                              K = K + 1
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + DCONJG(AP(K))*X(I)
-                              K = K + 1
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + (N-J+1)
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*AP(KK)
-                          DO 180 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + AP(K)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(AP(KK))
-                          DO 190 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              TEMP = TEMP + DCONJG(AP(K))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTPMV .
-*
-      END
diff --git a/superlu/BLAS/ztpsv.f b/superlu/BLAS/ztpsv.f
deleted file mode 100644
index 5874fdc4..00000000
--- a/superlu/BLAS/ztpsv.f
+++ /dev/null
@@ -1,390 +0,0 @@
-*> \brief \b ZTPSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 AP(*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTPSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix, supplied in packed form.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] AP
-*> \verbatim
-*>          AP is COMPLEX*16 array of DIMENSION at least
-*>           ( ( n*( n + 1 ) )/2 ).
-*>           Before entry with  UPLO = 'U' or 'u', the array AP must
-*>           contain the upper triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 1, 2 ) and a( 2, 2 )
-*>           respectively, and so on.
-*>           Before entry with UPLO = 'L' or 'l', the array AP must
-*>           contain the lower triangular matrix packed sequentially,
-*>           column by column, so that AP( 1 ) contains a( 1, 1 ),
-*>           AP( 2 ) and AP( 3 ) contain a( 2, 1 ) and a( 3, 1 )
-*>           respectively, and so on.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTPSV(UPLO,TRANS,DIAG,N,AP,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 AP(*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,K,KK,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 7
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTPSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of AP are
-*     accessed sequentially with one pass through AP.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK - 1
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K - 1
-   10                     CONTINUE
-                      END IF
-                      KK = KK - J
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 K = KK - 1,KK - J + 1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-                      KK = KK - J
-   40             CONTINUE
-              END IF
-          ELSE
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/AP(KK)
-                          TEMP = X(J)
-                          K = KK + 1
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*AP(K)
-                              K = K + 1
-   50                     CONTINUE
-                      END IF
-                      KK = KK + (N-J+1)
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/AP(KK)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 K = KK + 1,KK + N - J
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*AP(K)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-                      KK = KK + (N-J+1)
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              KK = 1
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K + 1
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - DCONJG(AP(K))*X(I)
-                              K = K + 1
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK+J-1))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK + J
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 120 K = KK,KK + J - 2
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK+J-1)
-                      ELSE
-                          DO 130 K = KK,KK + J - 2
-                              TEMP = TEMP - DCONJG(AP(K))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK+J-1))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-                      KK = KK + J
-  140             CONTINUE
-              END IF
-          ELSE
-              KK = (N* (N+1))/2
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      K = KK
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - AP(K)*X(I)
-                              K = K - 1
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - DCONJG(AP(K))*X(I)
-                              K = K - 1
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK-N+J))
-                      END IF
-                      X(J) = TEMP
-                      KK = KK - (N-J+1)
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = KX
-                      IF (NOCONJ) THEN
-                          DO 180 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - AP(K)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/AP(KK-N+J)
-                      ELSE
-                          DO 190 K = KK,KK - (N- (J+1)),-1
-                              TEMP = TEMP - DCONJG(AP(K))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(AP(KK-N+J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-                      KK = KK - (N-J+1)
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTPSV .
-*
-      END
diff --git a/superlu/BLAS/ztrmm.f b/superlu/BLAS/ztrmm.f
deleted file mode 100644
index 229f3322..00000000
--- a/superlu/BLAS/ztrmm.f
+++ /dev/null
@@ -1,452 +0,0 @@
-*> \brief \b ZTRMM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTRMM  performs one of the matrix-matrix operations
-*>
-*>    B := alpha*op( A )*B,   or   B := alpha*B*op( A )
-*>
-*> where  alpha  is a scalar,  B  is an m by n matrix,  A  is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry,  SIDE specifies whether  op( A ) multiplies B from
-*>           the left or right as follows:
-*>
-*>              SIDE = 'L' or 'l'   B := alpha*op( A )*B.
-*>
-*>              SIDE = 'R' or 'r'   B := alpha*B*op( A ).
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, k ), where k is m
-*>           when  SIDE = 'L' or 'l'  and is  n  when  SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] B
-*> \verbatim
-*>          B is (input/output) COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain the matrix  B,  and  on exit  is overwritten  by the
-*>           transformed matrix.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTRMM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOCONJ = LSAME(TRANSA,'T')
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTRMM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*A*B.
-*
-              IF (UPPER) THEN
-                  DO 50 J = 1,N
-                      DO 40 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              DO 30 I = 1,K - 1
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   30                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP*A(K,K)
-                              B(K,J) = TEMP
-                          END IF
-   40                 CONTINUE
-   50             CONTINUE
-              ELSE
-                  DO 80 J = 1,N
-                      DO 70 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*B(K,J)
-                              B(K,J) = TEMP
-                              IF (NOUNIT) B(K,J) = B(K,J)*A(K,K)
-                              DO 60 I = K + 1,M
-                                  B(I,J) = B(I,J) + TEMP*A(I,K)
-   60                         CONTINUE
-                          END IF
-   70                 CONTINUE
-   80             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*A**T*B   or   B := alpha*A**H*B.
-*
-              IF (UPPER) THEN
-                  DO 120 J = 1,N
-                      DO 110 I = M,1,-1
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 90 K = 1,I - 1
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-   90                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*DCONJG(A(I,I))
-                              DO 100 K = 1,I - 1
-                                  TEMP = TEMP + DCONJG(A(K,I))*B(K,J)
-  100                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  110                 CONTINUE
-  120             CONTINUE
-              ELSE
-                  DO 160 J = 1,N
-                      DO 150 I = 1,M
-                          TEMP = B(I,J)
-                          IF (NOCONJ) THEN
-                              IF (NOUNIT) TEMP = TEMP*A(I,I)
-                              DO 130 K = I + 1,M
-                                  TEMP = TEMP + A(K,I)*B(K,J)
-  130                         CONTINUE
-                          ELSE
-                              IF (NOUNIT) TEMP = TEMP*DCONJG(A(I,I))
-                              DO 140 K = I + 1,M
-                                  TEMP = TEMP + DCONJG(A(K,I))*B(K,J)
-  140                         CONTINUE
-                          END IF
-                          B(I,J) = ALPHA*TEMP
-  150                 CONTINUE
-  160             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*A.
-*
-              IF (UPPER) THEN
-                  DO 200 J = N,1,-1
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 170 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  170                 CONTINUE
-                      DO 190 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 180 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  180                         CONTINUE
-                          END IF
-  190                 CONTINUE
-  200             CONTINUE
-              ELSE
-                  DO 240 J = 1,N
-                      TEMP = ALPHA
-                      IF (NOUNIT) TEMP = TEMP*A(J,J)
-                      DO 210 I = 1,M
-                          B(I,J) = TEMP*B(I,J)
-  210                 CONTINUE
-                      DO 230 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              TEMP = ALPHA*A(K,J)
-                              DO 220 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  220                         CONTINUE
-                          END IF
-  230                 CONTINUE
-  240             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*A**T   or   B := alpha*B*A**H.
-*
-              IF (UPPER) THEN
-                  DO 280 K = 1,N
-                      DO 260 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*DCONJG(A(J,K))
-                              END IF
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*DCONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 270 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              ELSE
-                  DO 320 K = N,1,-1
-                      DO 300 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = ALPHA*A(J,K)
-                              ELSE
-                                  TEMP = ALPHA*DCONJG(A(J,K))
-                              END IF
-                              DO 290 I = 1,M
-                                  B(I,J) = B(I,J) + TEMP*B(I,K)
-  290                         CONTINUE
-                          END IF
-  300                 CONTINUE
-                      TEMP = ALPHA
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = TEMP*A(K,K)
-                          ELSE
-                              TEMP = TEMP*DCONJG(A(K,K))
-                          END IF
-                      END IF
-                      IF (TEMP.NE.ONE) THEN
-                          DO 310 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  310                     CONTINUE
-                      END IF
-  320             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTRMM .
-*
-      END
diff --git a/superlu/BLAS/ztrmv.f b/superlu/BLAS/ztrmv.f
deleted file mode 100644
index ab9065cf..00000000
--- a/superlu/BLAS/ztrmv.f
+++ /dev/null
@@ -1,373 +0,0 @@
-*> \brief \b ZTRMV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTRMV  performs one of the matrix-vector operations
-*>
-*>    x := A*x,   or   x := A**T*x,   or   x := A**H*x,
-*>
-*> where x is an n element vector and  A is an n by n unit, or non-unit,
-*> upper or lower triangular matrix.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the operation to be performed as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   x := A*x.
-*>
-*>              TRANS = 'T' or 't'   x := A**T*x.
-*>
-*>              TRANS = 'C' or 'c'   x := A**H*x.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in] X
-*> \verbatim
-*>          X is (input/output) COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element vector x. On exit, X is overwritten with the
-*>           transformed vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>  The vector and matrix arguments are not referenced when N = 0, or M = 0
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTRMV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTRMV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := A*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 10 I = 1,J - 1
-                              X(I) = X(I) + TEMP*A(I,J)
-   10                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 40 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 30 I = 1,J - 1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX + INCX
-   30                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX + INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          TEMP = X(J)
-                          DO 50 I = N,J + 1,-1
-                              X(I) = X(I) + TEMP*A(I,J)
-   50                     CONTINUE
-                          IF (NOUNIT) X(J) = X(J)*A(J,J)
-                      END IF
-   60             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 80 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          TEMP = X(JX)
-                          IX = KX
-                          DO 70 I = N,J + 1,-1
-                              X(IX) = X(IX) + TEMP*A(I,J)
-                              IX = IX - INCX
-   70                     CONTINUE
-                          IF (NOUNIT) X(JX) = X(JX)*A(J,J)
-                      END IF
-                      JX = JX - INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := A**T*x  or  x := A**H*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 90 I = J - 1,1,-1
-                              TEMP = TEMP + A(I,J)*X(I)
-   90                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 100 I = J - 1,1,-1
-                              TEMP = TEMP + DCONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 140 J = N,1,-1
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 120 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  120                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 130 I = J - 1,1,-1
-                              IX = IX - INCX
-                              TEMP = TEMP + DCONJG(A(I,J))*X(IX)
-  130                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 150 I = J + 1,N
-                              TEMP = TEMP + A(I,J)*X(I)
-  150                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 160 I = J + 1,N
-                              TEMP = TEMP + DCONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 200 J = 1,N
-                      TEMP = X(JX)
-                      IX = JX
-                      IF (NOCONJ) THEN
-                          IF (NOUNIT) TEMP = TEMP*A(J,J)
-                          DO 180 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + A(I,J)*X(IX)
-  180                     CONTINUE
-                      ELSE
-                          IF (NOUNIT) TEMP = TEMP*DCONJG(A(J,J))
-                          DO 190 I = J + 1,N
-                              IX = IX + INCX
-                              TEMP = TEMP + DCONJG(A(I,J))*X(IX)
-  190                     CONTINUE
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTRMV .
-*
-      END
diff --git a/superlu/BLAS/ztrsm.f b/superlu/BLAS/ztrsm.f
deleted file mode 100644
index cc1af763..00000000
--- a/superlu/BLAS/ztrsm.f
+++ /dev/null
@@ -1,477 +0,0 @@
-*> \brief \b ZTRSM
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*       .. Scalar Arguments ..
-*       COMPLEX*16 ALPHA
-*       INTEGER LDA,LDB,M,N
-*       CHARACTER DIAG,SIDE,TRANSA,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),B(LDB,*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTRSM  solves one of the matrix equations
-*>
-*>    op( A )*X = alpha*B,   or   X*op( A ) = alpha*B,
-*>
-*> where alpha is a scalar, X and B are m by n matrices, A is a unit, or
-*> non-unit,  upper or lower triangular matrix  and  op( A )  is one  of
-*>
-*>    op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H.
-*>
-*> The matrix X is overwritten on B.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] SIDE
-*> \verbatim
-*>          SIDE is CHARACTER*1
-*>           On entry, SIDE specifies whether op( A ) appears on the left
-*>           or right of X as follows:
-*>
-*>              SIDE = 'L' or 'l'   op( A )*X = alpha*B.
-*>
-*>              SIDE = 'R' or 'r'   X*op( A ) = alpha*B.
-*> \endverbatim
-*>
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix A is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANSA
-*> \verbatim
-*>          TRANSA is CHARACTER*1
-*>           On entry, TRANSA specifies the form of op( A ) to be used in
-*>           the matrix multiplication as follows:
-*>
-*>              TRANSA = 'N' or 'n'   op( A ) = A.
-*>
-*>              TRANSA = 'T' or 't'   op( A ) = A**T.
-*>
-*>              TRANSA = 'C' or 'c'   op( A ) = A**H.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit triangular
-*>           as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] M
-*> \verbatim
-*>          M is INTEGER
-*>           On entry, M specifies the number of rows of B. M must be at
-*>           least zero.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the number of columns of B.  N must be
-*>           at least zero.
-*> \endverbatim
-*>
-*> \param[in] ALPHA
-*> \verbatim
-*>          ALPHA is COMPLEX*16
-*>           On entry,  ALPHA specifies the scalar  alpha. When  alpha is
-*>           zero then  A is not referenced and  B need not be set before
-*>           entry.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, k ),
-*>           where k is m when SIDE = 'L' or 'l'
-*>             and k is n when SIDE = 'R' or 'r'.
-*>           Before entry  with  UPLO = 'U' or 'u',  the  leading  k by k
-*>           upper triangular part of the array  A must contain the upper
-*>           triangular matrix  and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry  with  UPLO = 'L' or 'l',  the  leading  k by k
-*>           lower triangular part of the array  A must contain the lower
-*>           triangular matrix  and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u',  the diagonal elements of
-*>           A  are not referenced either,  but are assumed to be  unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program.  When  SIDE = 'L' or 'l'  then
-*>           LDA  must be at least  max( 1, m ),  when  SIDE = 'R' or 'r'
-*>           then LDA must be at least max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] B
-*> \verbatim
-*>          B is COMPLEX*16 array of DIMENSION ( LDB, n ).
-*>           Before entry,  the leading  m by n part of the array  B must
-*>           contain  the  right-hand  side  matrix  B,  and  on exit  is
-*>           overwritten by the solution matrix  X.
-*> \endverbatim
-*>
-*> \param[in] LDB
-*> \verbatim
-*>          LDB is INTEGER
-*>           On entry, LDB specifies the first dimension of B as declared
-*>           in  the  calling  (sub)  program.   LDB  must  be  at  least
-*>           max( 1, m ).
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level3
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 3 Blas routine.
-*>
-*>  -- Written on 8-February-1989.
-*>     Jack Dongarra, Argonne National Laboratory.
-*>     Iain Duff, AERE Harwell.
-*>     Jeremy Du Croz, Numerical Algorithms Group Ltd.
-*>     Sven Hammarling, Numerical Algorithms Group Ltd.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTRSM(SIDE,UPLO,TRANSA,DIAG,M,N,ALPHA,A,LDA,B,LDB)
-*
-*  -- Reference BLAS level3 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      COMPLEX*16 ALPHA
-      INTEGER LDA,LDB,M,N
-      CHARACTER DIAG,SIDE,TRANSA,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),B(LDB,*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,J,K,NROWA
-      LOGICAL LSIDE,NOCONJ,NOUNIT,UPPER
-*     ..
-*     .. Parameters ..
-      COMPLEX*16 ONE
-      PARAMETER (ONE= (1.0D+0,0.0D+0))
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*
-*     Test the input parameters.
-*
-      LSIDE = LSAME(SIDE,'L')
-      IF (LSIDE) THEN
-          NROWA = M
-      ELSE
-          NROWA = N
-      END IF
-      NOCONJ = LSAME(TRANSA,'T')
-      NOUNIT = LSAME(DIAG,'N')
-      UPPER = LSAME(UPLO,'U')
-*
-      INFO = 0
-      IF ((.NOT.LSIDE) .AND. (.NOT.LSAME(SIDE,'R'))) THEN
-          INFO = 1
-      ELSE IF ((.NOT.UPPER) .AND. (.NOT.LSAME(UPLO,'L'))) THEN
-          INFO = 2
-      ELSE IF ((.NOT.LSAME(TRANSA,'N')) .AND.
-     +         (.NOT.LSAME(TRANSA,'T')) .AND.
-     +         (.NOT.LSAME(TRANSA,'C'))) THEN
-          INFO = 3
-      ELSE IF ((.NOT.LSAME(DIAG,'U')) .AND. (.NOT.LSAME(DIAG,'N'))) THEN
-          INFO = 4
-      ELSE IF (M.LT.0) THEN
-          INFO = 5
-      ELSE IF (N.LT.0) THEN
-          INFO = 6
-      ELSE IF (LDA.LT.MAX(1,NROWA)) THEN
-          INFO = 9
-      ELSE IF (LDB.LT.MAX(1,M)) THEN
-          INFO = 11
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTRSM ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (M.EQ.0 .OR. N.EQ.0) RETURN
-*
-*     And when  alpha.eq.zero.
-*
-      IF (ALPHA.EQ.ZERO) THEN
-          DO 20 J = 1,N
-              DO 10 I = 1,M
-                  B(I,J) = ZERO
-   10         CONTINUE
-   20     CONTINUE
-          RETURN
-      END IF
-*
-*     Start the operations.
-*
-      IF (LSIDE) THEN
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*inv( A )*B.
-*
-              IF (UPPER) THEN
-                  DO 60 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 30 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   30                     CONTINUE
-                      END IF
-                      DO 50 K = M,1,-1
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 40 I = 1,K - 1
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   40                         CONTINUE
-                          END IF
-   50                 CONTINUE
-   60             CONTINUE
-              ELSE
-                  DO 100 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 70 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-   70                     CONTINUE
-                      END IF
-                      DO 90 K = 1,M
-                          IF (B(K,J).NE.ZERO) THEN
-                              IF (NOUNIT) B(K,J) = B(K,J)/A(K,K)
-                              DO 80 I = K + 1,M
-                                  B(I,J) = B(I,J) - B(K,J)*A(I,K)
-   80                         CONTINUE
-                          END IF
-   90                 CONTINUE
-  100             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*inv( A**T )*B
-*           or    B := alpha*inv( A**H )*B.
-*
-              IF (UPPER) THEN
-                  DO 140 J = 1,N
-                      DO 130 I = 1,M
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 110 K = 1,I - 1
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  110                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 120 K = 1,I - 1
-                                  TEMP = TEMP - DCONJG(A(K,I))*B(K,J)
-  120                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/DCONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  130                 CONTINUE
-  140             CONTINUE
-              ELSE
-                  DO 180 J = 1,N
-                      DO 170 I = M,1,-1
-                          TEMP = ALPHA*B(I,J)
-                          IF (NOCONJ) THEN
-                              DO 150 K = I + 1,M
-                                  TEMP = TEMP - A(K,I)*B(K,J)
-  150                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/A(I,I)
-                          ELSE
-                              DO 160 K = I + 1,M
-                                  TEMP = TEMP - DCONJG(A(K,I))*B(K,J)
-  160                         CONTINUE
-                              IF (NOUNIT) TEMP = TEMP/DCONJG(A(I,I))
-                          END IF
-                          B(I,J) = TEMP
-  170                 CONTINUE
-  180             CONTINUE
-              END IF
-          END IF
-      ELSE
-          IF (LSAME(TRANSA,'N')) THEN
-*
-*           Form  B := alpha*B*inv( A ).
-*
-              IF (UPPER) THEN
-                  DO 230 J = 1,N
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 190 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  190                     CONTINUE
-                      END IF
-                      DO 210 K = 1,J - 1
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 200 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  200                         CONTINUE
-                          END IF
-  210                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 220 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  220                     CONTINUE
-                      END IF
-  230             CONTINUE
-              ELSE
-                  DO 280 J = N,1,-1
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 240 I = 1,M
-                              B(I,J) = ALPHA*B(I,J)
-  240                     CONTINUE
-                      END IF
-                      DO 260 K = J + 1,N
-                          IF (A(K,J).NE.ZERO) THEN
-                              DO 250 I = 1,M
-                                  B(I,J) = B(I,J) - A(K,J)*B(I,K)
-  250                         CONTINUE
-                          END IF
-  260                 CONTINUE
-                      IF (NOUNIT) THEN
-                          TEMP = ONE/A(J,J)
-                          DO 270 I = 1,M
-                              B(I,J) = TEMP*B(I,J)
-  270                     CONTINUE
-                      END IF
-  280             CONTINUE
-              END IF
-          ELSE
-*
-*           Form  B := alpha*B*inv( A**T )
-*           or    B := alpha*B*inv( A**H ).
-*
-              IF (UPPER) THEN
-                  DO 330 K = N,1,-1
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/DCONJG(A(K,K))
-                          END IF
-                          DO 290 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  290                     CONTINUE
-                      END IF
-                      DO 310 J = 1,K - 1
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = DCONJG(A(J,K))
-                              END IF
-                              DO 300 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  300                         CONTINUE
-                          END IF
-  310                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 320 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  320                     CONTINUE
-                      END IF
-  330             CONTINUE
-              ELSE
-                  DO 380 K = 1,N
-                      IF (NOUNIT) THEN
-                          IF (NOCONJ) THEN
-                              TEMP = ONE/A(K,K)
-                          ELSE
-                              TEMP = ONE/DCONJG(A(K,K))
-                          END IF
-                          DO 340 I = 1,M
-                              B(I,K) = TEMP*B(I,K)
-  340                     CONTINUE
-                      END IF
-                      DO 360 J = K + 1,N
-                          IF (A(J,K).NE.ZERO) THEN
-                              IF (NOCONJ) THEN
-                                  TEMP = A(J,K)
-                              ELSE
-                                  TEMP = DCONJG(A(J,K))
-                              END IF
-                              DO 350 I = 1,M
-                                  B(I,J) = B(I,J) - TEMP*B(I,K)
-  350                         CONTINUE
-                          END IF
-  360                 CONTINUE
-                      IF (ALPHA.NE.ONE) THEN
-                          DO 370 I = 1,M
-                              B(I,K) = ALPHA*B(I,K)
-  370                     CONTINUE
-                      END IF
-  380             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTRSM .
-*
-      END
diff --git a/superlu/BLAS/ztrsv.f b/superlu/BLAS/ztrsv.f
deleted file mode 100644
index 577b5cae..00000000
--- a/superlu/BLAS/ztrsv.f
+++ /dev/null
@@ -1,375 +0,0 @@
-*> \brief \b ZTRSV
-*
-*  =========== DOCUMENTATION ===========
-*
-* Online html documentation available at
-*            http://www.netlib.org/lapack/explore-html/
-*
-*  Definition:
-*  ===========
-*
-*       SUBROUTINE ZTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*       .. Scalar Arguments ..
-*       INTEGER INCX,LDA,N
-*       CHARACTER DIAG,TRANS,UPLO
-*       ..
-*       .. Array Arguments ..
-*       COMPLEX*16 A(LDA,*),X(*)
-*       ..
-*
-*
-*> \par Purpose:
-*  =============
-*>
-*> \verbatim
-*>
-*> ZTRSV  solves one of the systems of equations
-*>
-*>    A*x = b,   or   A**T*x = b,   or   A**H*x = b,
-*>
-*> where b and x are n element vectors and A is an n by n unit, or
-*> non-unit, upper or lower triangular matrix.
-*>
-*> No test for singularity or near-singularity is included in this
-*> routine. Such tests must be performed before calling this routine.
-*> \endverbatim
-*
-*  Arguments:
-*  ==========
-*
-*> \param[in] UPLO
-*> \verbatim
-*>          UPLO is CHARACTER*1
-*>           On entry, UPLO specifies whether the matrix is an upper or
-*>           lower triangular matrix as follows:
-*>
-*>              UPLO = 'U' or 'u'   A is an upper triangular matrix.
-*>
-*>              UPLO = 'L' or 'l'   A is a lower triangular matrix.
-*> \endverbatim
-*>
-*> \param[in] TRANS
-*> \verbatim
-*>          TRANS is CHARACTER*1
-*>           On entry, TRANS specifies the equations to be solved as
-*>           follows:
-*>
-*>              TRANS = 'N' or 'n'   A*x = b.
-*>
-*>              TRANS = 'T' or 't'   A**T*x = b.
-*>
-*>              TRANS = 'C' or 'c'   A**H*x = b.
-*> \endverbatim
-*>
-*> \param[in] DIAG
-*> \verbatim
-*>          DIAG is CHARACTER*1
-*>           On entry, DIAG specifies whether or not A is unit
-*>           triangular as follows:
-*>
-*>              DIAG = 'U' or 'u'   A is assumed to be unit triangular.
-*>
-*>              DIAG = 'N' or 'n'   A is not assumed to be unit
-*>                                  triangular.
-*> \endverbatim
-*>
-*> \param[in] N
-*> \verbatim
-*>          N is INTEGER
-*>           On entry, N specifies the order of the matrix A.
-*>           N must be at least zero.
-*> \endverbatim
-*>
-*> \param[in] A
-*> \verbatim
-*>          A is COMPLEX*16 array of DIMENSION ( LDA, n ).
-*>           Before entry with  UPLO = 'U' or 'u', the leading n by n
-*>           upper triangular part of the array A must contain the upper
-*>           triangular matrix and the strictly lower triangular part of
-*>           A is not referenced.
-*>           Before entry with UPLO = 'L' or 'l', the leading n by n
-*>           lower triangular part of the array A must contain the lower
-*>           triangular matrix and the strictly upper triangular part of
-*>           A is not referenced.
-*>           Note that when  DIAG = 'U' or 'u', the diagonal elements of
-*>           A are not referenced either, but are assumed to be unity.
-*> \endverbatim
-*>
-*> \param[in] LDA
-*> \verbatim
-*>          LDA is INTEGER
-*>           On entry, LDA specifies the first dimension of A as declared
-*>           in the calling (sub) program. LDA must be at least
-*>           max( 1, n ).
-*> \endverbatim
-*>
-*> \param[in,out] X
-*> \verbatim
-*>          X is COMPLEX*16 array of dimension at least
-*>           ( 1 + ( n - 1 )*abs( INCX ) ).
-*>           Before entry, the incremented array X must contain the n
-*>           element right-hand side vector b. On exit, X is overwritten
-*>           with the solution vector x.
-*> \endverbatim
-*>
-*> \param[in] INCX
-*> \verbatim
-*>          INCX is INTEGER
-*>           On entry, INCX specifies the increment for the elements of
-*>           X. INCX must not be zero.
-*> \endverbatim
-*
-*  Authors:
-*  ========
-*
-*> \author Univ. of Tennessee
-*> \author Univ. of California Berkeley
-*> \author Univ. of Colorado Denver
-*> \author NAG Ltd.
-*
-*> \date December 2016
-*
-*> \ingroup complex16_blas_level2
-*
-*> \par Further Details:
-*  =====================
-*>
-*> \verbatim
-*>
-*>  Level 2 Blas routine.
-*>
-*>  -- Written on 22-October-1986.
-*>     Jack Dongarra, Argonne National Lab.
-*>     Jeremy Du Croz, Nag Central Office.
-*>     Sven Hammarling, Nag Central Office.
-*>     Richard Hanson, Sandia National Labs.
-*> \endverbatim
-*>
-*  =====================================================================
-      SUBROUTINE ZTRSV(UPLO,TRANS,DIAG,N,A,LDA,X,INCX)
-*
-*  -- Reference BLAS level2 routine (version 3.7.0) --
-*  -- Reference BLAS is a software package provided by Univ. of Tennessee,    
--
-*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
-*     December 2016
-*
-*     .. Scalar Arguments ..
-      INTEGER INCX,LDA,N
-      CHARACTER DIAG,TRANS,UPLO
-*     ..
-*     .. Array Arguments ..
-      COMPLEX*16 A(LDA,*),X(*)
-*     ..
-*
-*  =====================================================================
-*
-*     .. Parameters ..
-      COMPLEX*16 ZERO
-      PARAMETER (ZERO= (0.0D+0,0.0D+0))
-*     ..
-*     .. Local Scalars ..
-      COMPLEX*16 TEMP
-      INTEGER I,INFO,IX,J,JX,KX
-      LOGICAL NOCONJ,NOUNIT
-*     ..
-*     .. External Functions ..
-      LOGICAL LSAME
-      EXTERNAL LSAME
-*     ..
-*     .. External Subroutines ..
-      EXTERNAL XERBLA
-*     ..
-*     .. Intrinsic Functions ..
-      INTRINSIC DCONJG,MAX
-*     ..
-*
-*     Test the input parameters.
-*
-      INFO = 0
-      IF (.NOT.LSAME(UPLO,'U') .AND. .NOT.LSAME(UPLO,'L')) THEN
-          INFO = 1
-      ELSE IF (.NOT.LSAME(TRANS,'N') .AND. .NOT.LSAME(TRANS,'T') .AND.
-     +         .NOT.LSAME(TRANS,'C')) THEN
-          INFO = 2
-      ELSE IF (.NOT.LSAME(DIAG,'U') .AND. .NOT.LSAME(DIAG,'N')) THEN
-          INFO = 3
-      ELSE IF (N.LT.0) THEN
-          INFO = 4
-      ELSE IF (LDA.LT.MAX(1,N)) THEN
-          INFO = 6
-      ELSE IF (INCX.EQ.0) THEN
-          INFO = 8
-      END IF
-      IF (INFO.NE.0) THEN
-          CALL XERBLA('ZTRSV ',INFO)
-          RETURN
-      END IF
-*
-*     Quick return if possible.
-*
-      IF (N.EQ.0) RETURN
-*
-      NOCONJ = LSAME(TRANS,'T')
-      NOUNIT = LSAME(DIAG,'N')
-*
-*     Set up the start point in X if the increment is not unity. This
-*     will be  ( N - 1 )*INCX  too small for descending loops.
-*
-      IF (INCX.LE.0) THEN
-          KX = 1 - (N-1)*INCX
-      ELSE IF (INCX.NE.1) THEN
-          KX = 1
-      END IF
-*
-*     Start the operations. In this version the elements of A are
-*     accessed sequentially with one pass through A.
-*
-      IF (LSAME(TRANS,'N')) THEN
-*
-*        Form  x := inv( A )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 20 J = N,1,-1
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 10 I = J - 1,1,-1
-                              X(I) = X(I) - TEMP*A(I,J)
-   10                     CONTINUE
-                      END IF
-   20             CONTINUE
-              ELSE
-                  JX = KX + (N-1)*INCX
-                  DO 40 J = N,1,-1
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 30 I = J - 1,1,-1
-                              IX = IX - INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   30                     CONTINUE
-                      END IF
-                      JX = JX - INCX
-   40             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 60 J = 1,N
-                      IF (X(J).NE.ZERO) THEN
-                          IF (NOUNIT) X(J) = X(J)/A(J,J)
-                          TEMP = X(J)
-                          DO 50 I = J + 1,N
-                              X(I) = X(I) - TEMP*A(I,J)
-   50                     CONTINUE
-                      END IF
-   60             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 80 J = 1,N
-                      IF (X(JX).NE.ZERO) THEN
-                          IF (NOUNIT) X(JX) = X(JX)/A(J,J)
-                          TEMP = X(JX)
-                          IX = JX
-                          DO 70 I = J + 1,N
-                              IX = IX + INCX
-                              X(IX) = X(IX) - TEMP*A(I,J)
-   70                     CONTINUE
-                      END IF
-                      JX = JX + INCX
-   80             CONTINUE
-              END IF
-          END IF
-      ELSE
-*
-*        Form  x := inv( A**T )*x  or  x := inv( A**H )*x.
-*
-          IF (LSAME(UPLO,'U')) THEN
-              IF (INCX.EQ.1) THEN
-                  DO 110 J = 1,N
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 90 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(I)
-   90                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 100 I = 1,J - 1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(I)
-  100                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  110             CONTINUE
-              ELSE
-                  JX = KX
-                  DO 140 J = 1,N
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 120 I = 1,J - 1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX + INCX
-  120                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 130 I = 1,J - 1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(IX)
-                              IX = IX + INCX
-  130                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX + INCX
-  140             CONTINUE
-              END IF
-          ELSE
-              IF (INCX.EQ.1) THEN
-                  DO 170 J = N,1,-1
-                      TEMP = X(J)
-                      IF (NOCONJ) THEN
-                          DO 150 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(I)
-  150                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 160 I = N,J + 1,-1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(I)
-  160                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(J) = TEMP
-  170             CONTINUE
-              ELSE
-                  KX = KX + (N-1)*INCX
-                  JX = KX
-                  DO 200 J = N,1,-1
-                      IX = KX
-                      TEMP = X(JX)
-                      IF (NOCONJ) THEN
-                          DO 180 I = N,J + 1,-1
-                              TEMP = TEMP - A(I,J)*X(IX)
-                              IX = IX - INCX
-  180                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/A(J,J)
-                      ELSE
-                          DO 190 I = N,J + 1,-1
-                              TEMP = TEMP - DCONJG(A(I,J))*X(IX)
-                              IX = IX - INCX
-  190                     CONTINUE
-                          IF (NOUNIT) TEMP = TEMP/DCONJG(A(J,J))
-                      END IF
-                      X(JX) = TEMP
-                      JX = JX - INCX
-  200             CONTINUE
-              END IF
-          END IF
-      END IF
-*
-      RETURN
-*
-*     End of ZTRSV .
-*
-      END
diff --git a/superlu/BLAS_f2c.h b/superlu/BLAS_f2c.h
deleted file mode 100644
index ad4552a4..00000000
--- a/superlu/BLAS_f2c.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/* f2c.h  --  Standard Fortran to C header file */
-
-/*
-// Copyright (C) 2004 
-// Christian Stimming <stimming@tuhh.de>
- 
-// This library is free software; you can redistribute it and/or
-// modify it under the terms of the GNU Lesser General Public License as
-// published by the Free Software Foundation; either version 2, or (at
-// your option) any later version.
- 
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Lesser General Public License for more details.
- 
-// You should have received a copy of the GNU Lesser General Public License 
along
-// with this library; see the file COPYING.  If not, write to the Free
-// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, 
USA.
-*/
-
-/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
-
-       - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
-
-#ifndef F2C_INCLUDE
-#define F2C_INCLUDE
-
-typedef int integer;
-typedef char *address;
-typedef short int shortint;
-typedef float real;
-typedef double doublereal;
-typedef struct { real r, i; } complex;
-typedef struct { doublereal r, i; } doublecomplex;
-typedef int logical;
-typedef short int shortlogical;
-typedef char logical1;
-typedef char integer1;
-
-#define TRUE_ (1)
-#define FALSE_ (0)
-
-/* Extern is for use with -E */
-#ifndef Extern
-#define Extern extern
-#endif
-
-/* I/O stuff */
-
-#ifdef f2c_i2
-/* for -i2 */
-typedef short flag;
-typedef short ftnlen;
-typedef short ftnint;
-#else
-typedef int flag;
-typedef int ftnlen;
-typedef int ftnint;
-#endif
-
-/*external read, write*/
-typedef struct
-{      flag cierr;
-       ftnint ciunit;
-       flag ciend;
-       char *cifmt;
-       ftnint cirec;
-} cilist;
-
-/*internal read, write*/
-typedef struct
-{      flag icierr;
-       char *iciunit;
-       flag iciend;
-       char *icifmt;
-       ftnint icirlen;
-       ftnint icirnum;
-} icilist;
-
-/*open*/
-typedef struct
-{      flag oerr;
-       ftnint ounit;
-       char *ofnm;
-       ftnlen ofnmlen;
-       char *osta;
-       char *oacc;
-       char *ofm;
-       ftnint orl;
-       char *oblnk;
-} olist;
-
-/*close*/
-typedef struct
-{      flag cerr;
-       ftnint cunit;
-       char *csta;
-} cllist;
-
-/*rewind, backspace, endfile*/
-typedef struct
-{      flag aerr;
-       ftnint aunit;
-} alist;
-
-/* inquire */
-typedef struct
-{      flag inerr;
-       ftnint inunit;
-       char *infile;
-       ftnlen infilen;
-       ftnint  *inex;  /*parameters in standard's order*/
-       ftnint  *inopen;
-       ftnint  *innum;
-       ftnint  *innamed;
-       char    *inname;
-       ftnlen  innamlen;
-       char    *inacc;
-       ftnlen  inacclen;
-       char    *inseq;
-       ftnlen  inseqlen;
-       char    *indir;
-       ftnlen  indirlen;
-       char    *infmt;
-       ftnlen  infmtlen;
-       char    *inform;
-       ftnint  informlen;
-       char    *inunf;
-       ftnlen  inunflen;
-       ftnint  *inrecl;
-       ftnint  *innrec;
-       char    *inblank;
-       ftnlen  inblanklen;
-} inlist;
-
-#define VOID void
-
-union Multitype {      /* for multiple entry points */
-       shortint h;
-       integer i;
-       real r;
-       doublereal d;
-       complex c;
-       doublecomplex z;
-       };
-
-typedef union Multitype Multitype;
-
-typedef long Long;     /* No longer used; formerly in Namelist */
-
-struct Vardesc {       /* for Namelist */
-       char *name;
-       char *addr;
-       ftnlen *dims;
-       int  type;
-       };
-typedef struct Vardesc Vardesc;
-
-struct Namelist {
-       char *name;
-       Vardesc **vars;
-       int nvars;
-       };
-typedef struct Namelist Namelist;
-
-#ifndef abs
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#endif
-#define dabs(x) (doublereal)abs(x)
-#ifndef min
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#endif
-#ifndef max
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-#endif
-#define dmin(a,b) (doublereal)min(a,b)
-#define dmax(a,b) (doublereal)max(a,b)
-
-/* procedure parameter types for -A and -C++ */
-
-#define F2C_proc_par_types 1
-#ifdef __cplusplus
-typedef int /* Unknown procedure type */ (*U_fp)(...);
-typedef shortint (*J_fp)(...);
-typedef integer (*I_fp)(...);
-typedef real (*R_fp)(...);
-typedef doublereal (*D_fp)(...), (*E_fp)(...);
-typedef /* Complex */ VOID (*C_fp)(...);
-typedef /* Double Complex */ VOID (*Z_fp)(...);
-typedef logical (*L_fp)(...);
-typedef shortlogical (*K_fp)(...);
-typedef /* Character */ VOID (*H_fp)(...);
-typedef /* Subroutine */ int (*S_fp)(...);
-#else
-typedef int /* Unknown procedure type */ (*U_fp)();
-typedef shortint (*J_fp)();
-typedef integer (*I_fp)();
-typedef real (*R_fp)();
-typedef doublereal (*D_fp)(), (*E_fp)();
-typedef /* Complex */ VOID (*C_fp)();
-typedef /* Double Complex */ VOID (*Z_fp)();
-typedef logical (*L_fp)();
-typedef shortlogical (*K_fp)();
-typedef /* Character */ VOID (*H_fp)();
-typedef /* Subroutine */ int (*S_fp)();
-#endif
-/* E_fp is for real functions when -R is not specified */
-typedef VOID C_f;      /* complex function */
-typedef VOID H_f;      /* character function */
-typedef VOID Z_f;      /* double complex function */
-typedef doublereal E_f;        /* real function with -R not specified */
-
-/* undef any lower-case symbols that your C compiler predefines, e.g.: */
-
-#ifndef Skip_f2c_Undefs
-#undef cray
-#undef gcos
-#undef mc68010
-#undef mc68020
-#undef mips
-#undef pdp11
-#undef sgi
-#undef sparc
-#undef sun
-#undef sun2
-#undef sun3
-#undef sun4
-#undef u370
-#undef u3b
-#undef u3b2
-#undef u3b5
-#undef unix
-#undef vax
-#endif
-#endif
diff --git a/superlu/License.txt b/superlu/License.txt
deleted file mode 100644
index b5b5b0c6..00000000
--- a/superlu/License.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-Redistribution and use in source and binary forms, with or without 
modification,
-are permitted provided that the following conditions are met: 
-
-(1) Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer. 
-(2) Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution. 
-(3) Neither the name of Lawrence Berkeley National Laboratory, U.S. Dept. of
-Energy nor the names of its contributors may be used to endorse or promote
-products derived from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
-  
diff --git a/superlu/Makefile.am b/superlu/Makefile.am
deleted file mode 100644
index 198642f1..00000000
--- a/superlu/Makefile.am
+++ /dev/null
@@ -1,329 +0,0 @@
-#
-# Copyright (c) 2003, The Regents of the University of California, through
-# Lawrence Berkeley National Laboratory (subject to receipt of any required 
-# approvals from U.S. Dept. of Energy) 
-#
-# All rights reserved. 
-#
-# The source code is distributed under BSD license, see the file License.txt
-#
-
-
-#  Copyright (C) 2004-2020 Yves Renard
-#
-#  This file is a part of GetFEM++
-#
-#  GetFEM++  is  free software;  you  can  redistribute  it  and/or modify it
-#  under  the  terms  of the  GNU  Lesser General Public License as published
-#  by  the  Free Software Foundation;  either version 3 of the License,  or
-#  (at your option) any later version along with the GCC Runtime Library
-#  Exception either version 3.1 or (at your option) any later version.
-#  This program  is  distributed  in  the  hope  that it will be useful,  but
-#  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-#  or  FITNESS  FOR  A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-#  License and GCC Runtime Library Exception for more details.
-#  You  should  have received a copy of the GNU Lesser General Public License
-#  along  with  this program;  if not, write to the Free Software Foundation,
-#  Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301, USA.
-
-noinst_HEADERS=\
-       slu_Cnames.h slu_dcomplex.h slu_scomplex.h slu_util.h\
-       supermatrix.h colamd.h slu_cdefs.h slu_ddefs.h\
-       slu_sdefs.h slu_zdefs.h
-
-
-
-if USEBLASLITE
-BLASLITEFILES=BLAS.c BLAS_f2c.h f2c_lite.c
-else
-BLASLITEFILES=
-endif
-
-SRC = ccolumn_bmod.c \
-       ccolumn_dfs.c \
-       ccopy_to_ucol.c \
-       cgscon.c \
-       cgsequ.c \
-       cgsrfs.c \
-       cgssv.c \
-       cgssvx.c \
-       cgstrf.c \
-       cgstrs.c \
-       clacon.c \
-       clangs.c \
-       claqgs.c \
-       cmemory.c \
-       cmyblas2.c \
-       colamd.c \
-       cpanel_bmod.c \
-       cpanel_dfs.c \
-cpivotgrowth.c \
-cpivotL.c \
-cpruneL.c \
-creadhb.c \
-csnode_bmod.c \
-csnode_dfs.c \
-csp_blas2.c \
-csp_blas3.c \
-cutil.c \
-dcolumn_bmod.c \
-dcolumn_dfs.c \
-dcomplex.c \
-dcopy_to_ucol.c \
-dgscon.c \
-dgsequ.c \
-dgsrfs.c \
-dgssv.c \
-dgssvx.c \
-dgstrf.c \
-dgstrs.c \
-dlacon.c \
-dlamch.c \
-dlangs.c \
-dlaqgs.c \
-dmemory.c \
-dmyblas2.c \
-dpanel_bmod.c \
-dpanel_dfs.c \
-dpivotgrowth.c \
-dpivotL.c \
-dpruneL.c \
-dreadhb.c \
-dsnode_bmod.c \
-dsnode_dfs.c \
-dsp_blas2.c \
-dsp_blas3.c \
-dutil.c \
-dzsum1.c \
-get_perm_c.c \
-heap_relax_snode.c \
-icmax1.c \
-izmax1.c \
-lsame.c \
-memory.c \
-mmd.c \
-relax_snode.c \
-scolumn_bmod.c \
-scolumn_dfs.c \
-scomplex.c \
-scopy_to_ucol.c \
-scsum1.c \
-sgscon.c \
-sgsequ.c \
-sgsrfs.c \
-sgssv.c \
-sgssvx.c \
-sgstrf.c \
-sgstrs.c \
-slacon.c \
-slamch.c \
-slangs.c \
-slaqgs.c \
-smemory.c \
-smyblas2.c \
-spanel_bmod.c \
-spanel_dfs.c \
-sp_coletree.c \
-sp_ienv.c \
-spivotgrowth.c \
-spivotL.c \
-sp_preorder.c \
-spruneL.c \
-sreadhb.c \
-ssnode_bmod.c \
-ssnode_dfs.c \
-ssp_blas2.c \
-ssp_blas3.c \
-superlu_timer.c \
-sutil.c \
-util.c \
-zcolumn_bmod.c \
-zcolumn_dfs.c \
-zcopy_to_ucol.c \
-zgscon.c \
-zgsequ.c \
-zgsrfs.c \
-zgssv.c \
-zgssvx.c \
-zgstrf.c \
-zgstrs.c \
-zlacon.c \
-zlangs.c \
-zlaqgs.c \
-zmemory.c \
-zmyblas2.c \
-zpanel_bmod.c \
-zpanel_dfs.c \
-zpivotgrowth.c \
-zpivotL.c \
-zpruneL.c \
-zreadhb.c \
-zsnode_bmod.c \
-zsnode_dfs.c \
-zsp_blas2.c \
-zsp_blas3.c \
-zutil.c $(BLASLITEFILES)
-
-#vire: xerbla.c 
-
-
-noinst_LTLIBRARIES = libsuperlu.la
-libsuperlu_la_SOURCES = $(SRC)
-#libsuperlu_la_LDFLAGS = ${LIBTOOL_VERSION_INFO}
-libsuperlu_la_CPPFLAGS = @SUPERLU_CPPFLAGS@
-
-CLEANFILES = ii_files/* *.o.d
-
-EXTRA_DIST=License.txt \
-BLAS/License.txt \
-BLAS/caxpy.f \
-BLAS/ccopy.f \
-BLAS/cdotc.f \
-BLAS/cdotu.f \
-BLAS/cgbmv.f \
-BLAS/cgemm.f \
-BLAS/cgemv.f \
-BLAS/cgerc.f \
-BLAS/cgeru.f \
-BLAS/chbmv.f \
-BLAS/chemm.f \
-BLAS/chemv.f \
-BLAS/cher2.f \
-BLAS/cher2k.f \
-BLAS/cher.f \
-BLAS/cherk.f \
-BLAS/chpmv.f \
-BLAS/chpr2.f \
-BLAS/chpr.f \
-BLAS/crotg.f \
-BLAS/cscal.f \
-BLAS/csrot.f \
-BLAS/csscal.f \
-BLAS/cswap.f \
-BLAS/csymm.f \
-BLAS/csyr2k.f \
-BLAS/csyrk.f \
-BLAS/ctbmv.f \
-BLAS/ctbsv.f \
-BLAS/ctpmv.f \
-BLAS/ctpsv.f \
-BLAS/ctrmm.f \
-BLAS/ctrmv.f \
-BLAS/ctrsm.f \
-BLAS/ctrsv.f \
-BLAS/dasum.f \
-BLAS/daxpy.f \
-BLAS/dcabs1.f \
-BLAS/dcopy.f \
-BLAS/ddot.f \
-BLAS/dgbmv.f \
-BLAS/dgemm.f \
-BLAS/dgemv.f \
-BLAS/dger.f \
-BLAS/dnrm2.f \
-BLAS/drot.f \
-BLAS/drotg.f \
-BLAS/drotm.f \
-BLAS/drotmg.f \
-BLAS/dsbmv.f \
-BLAS/dscal.f \
-BLAS/dsdot.f \
-BLAS/dspmv.f \
-BLAS/dspr2.f \
-BLAS/dspr.f \
-BLAS/dswap.f \
-BLAS/dsymm.f \
-BLAS/dsymv.f \
-BLAS/dsyr2.f \
-BLAS/dsyr2k.f \
-BLAS/dsyr.f \
-BLAS/dsyrk.f \
-BLAS/dtbmv.f \
-BLAS/dtbsv.f \
-BLAS/dtpmv.f \
-BLAS/dtpsv.f \
-BLAS/dtrmm.f \
-BLAS/dtrmv.f \
-BLAS/dtrsm.f \
-BLAS/dtrsv.f \
-BLAS/dzasum.f \
-BLAS/dznrm2.f \
-BLAS/icamax.f \
-BLAS/idamax.f \
-BLAS/isamax.f \
-BLAS/izamax.f \
-BLAS/lsame.f \
-BLAS/sasum.f \
-BLAS/saxpy.f \
-BLAS/scabs1.f \
-BLAS/scasum.f \
-BLAS/scnrm2.f \
-BLAS/scopy.f \
-BLAS/sdot.f \
-BLAS/sdsdot.f \
-BLAS/sgbmv.f \
-BLAS/sgemm.f \
-BLAS/sgemv.f \
-BLAS/sger.f \
-BLAS/snrm2.f \
-BLAS/srot.f \
-BLAS/srotg.f \
-BLAS/srotm.f \
-BLAS/srotmg.f \
-BLAS/ssbmv.f \
-BLAS/sscal.f \
-BLAS/sspmv.f \
-BLAS/sspr2.f \
-BLAS/sspr.f \
-BLAS/sswap.f \
-BLAS/ssymm.f \
-BLAS/ssymv.f \
-BLAS/ssyr2.f \
-BLAS/ssyr2k.f \
-BLAS/ssyr.f \
-BLAS/ssyrk.f \
-BLAS/stbmv.f \
-BLAS/stbsv.f \
-BLAS/stpmv.f \
-BLAS/stpsv.f \
-BLAS/strmm.f \
-BLAS/strmv.f \
-BLAS/strsm.f \
-BLAS/strsv.f \
-BLAS/xerbla_array.f \
-BLAS/xerbla.f \
-BLAS/zaxpy.f \
-BLAS/zcopy.f \
-BLAS/zdotc.f \
-BLAS/zdotu.f \
-BLAS/zdrot.f \
-BLAS/zdscal.f \
-BLAS/zgbmv.f \
-BLAS/zgemm.f \
-BLAS/zgemv.f \
-BLAS/zgerc.f \
-BLAS/zgeru.f \
-BLAS/zhbmv.f \
-BLAS/zhemm.f \
-BLAS/zhemv.f \
-BLAS/zher2.f \
-BLAS/zher2k.f \
-BLAS/zher.f \
-BLAS/zherk.f \
-BLAS/zhpmv.f \
-BLAS/zhpr2.f \
-BLAS/zhpr.f \
-BLAS/zrotg.f \
-BLAS/zscal.f \
-BLAS/zswap.f \
-BLAS/zsymm.f \
-BLAS/zsyr2k.f \
-BLAS/zsyrk.f \
-BLAS/ztbmv.f \
-BLAS/ztbsv.f \
-BLAS/ztpmv.f \
-BLAS/ztpsv.f \
-BLAS/ztrmm.f \
-BLAS/ztrmv.f \
-BLAS/ztrsm.f \
-BLAS/ztrsv.f
diff --git a/superlu/ccolumn_bmod.c b/superlu/ccolumn_bmod.c
deleted file mode 100644
index d6341af1..00000000
--- a/superlu/ccolumn_bmod.c
+++ /dev/null
@@ -1,362 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
-
-
-/* 
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-ccolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            complex     *dense,          /* in */
-            complex     *tempv,          /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
-{
-/*
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int         incx = 1, incy = 1;
-    complex      alpha, beta;
-    
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    complex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    complex       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    complex       *tempv1;
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-    complex      none = {-1.0, 0.0};
-    complex     comp_temp, comp_temp1;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-
-
-
-
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                   c_sub(&dense[irow], &dense[irow], &comp_temp);
-                   luptr++;
-               }
-
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   c_sub(&ukj, &ukj, &comp_temp);
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                   c_sub(&ukj1, &ukj1, &comp_temp);
-
-                   cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                   c_add(&comp_temp, &comp_temp, &comp_temp1);
-                   c_sub(&ukj, &ukj, &comp_temp);
-
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               }
-
-
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-
-               no_zeros = kfnz - fst_col;
-
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-               
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#else          
-               ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#endif         
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-               cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-               clsolve ( nsupr, segsze, &lusup[luptr], tempv );
-
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               cmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-#endif
-               
-               
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   c_sub(&dense[irow], &dense[irow], &tempv1[i]);
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-           
-       } /* if jsupno ... */
-
-    } /* for each segment... */
-
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-    if ( fst_col < jcol ) {
-
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-       
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#else
-       ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#endif
-       
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-
-#ifdef _CRAY
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-
-       cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-       
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           c_sub(&lusup[isub], &lusup[isub], &tempv[i]);
-           tempv[i] = zero;
-           ++isub;
-       }
-
-#endif
-       
-       
-    } /* if fst_col < jcol ... */ 
-
-    return 0;
-}
diff --git a/superlu/ccolumn_dfs.c b/superlu/ccolumn_dfs.c
deleted file mode 100644
index e60ba5af..00000000
--- a/superlu/ccolumn_dfs.c
+++ /dev/null
@@ -1,266 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_cdefs.h"
-
-/* What type of supernodes we want */
-#define T2_SUPER
-
-int
-ccolumn_dfs(
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
-{
-/* 
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-
-
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
*/
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
-                                        
cLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-
-                          } /* else */
-
-                       } /* if */
-
-                   } /* while */
-
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-
-               } while ( kpar != EMPTY );      /* Until empty stack */
-
-           } /* else */
-
-       } /* else */
-
-    } /* for each nonzero ... */
-
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-#endif
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-#ifdef CHK_COMPRESS
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-#endif
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-
-    }  /* else: jcol > 0 */ 
-    
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-
-    return 0;
-}
diff --git a/superlu/ccopy_to_ucol.c b/superlu/ccopy_to_ucol.c
deleted file mode 100644
index a0554a96..00000000
--- a/superlu/ccopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_cdefs.h"
-
-int
-ccopy_to_ucol(
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             complex     *dense,   /* modified - reset to zero on return */
-             GlobalLU_t *Glu      /* modified */
-             )
-{
-/* 
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    complex    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-
-    complex zero = {0.0, 0.0};
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = cLUMemXpand(jcol, nextu, UCOL, &nzumax, 
Glu))
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = cLUMemXpand(jcol, nextu, USUB, &nzumax, 
Glu))
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-
-           }
-
-       }
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
-}
diff --git a/superlu/cgscon.c b/superlu/cgscon.c
deleted file mode 100644
index 5bd3c49a..00000000
--- a/superlu/cgscon.c
+++ /dev/null
@@ -1,155 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-  Copyright (c) 2003 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  cgscon.c
- * History:     Modified from lapack routines CGECON.
- */
-#include <math.h>
-#include "slu_cdefs.h"
-
-void
-cgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       float anorm, float *rcond, SuperLUStat_t *stat, int *info)
-{
-/*
-    Purpose   
-    =======   
-
-    CGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by CGETRF.   
-
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-           
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            cgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- 
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            cgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_C, Mtype = TRU.
-           
-    ANORM   (input) float
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-           
-    RCOND   (output) float*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    float ainvnm;
-    complex *work;
-    extern int crscl_(int *, complex *, complex *, int *);
-
-    extern int clacon_(int *, complex *, complex *, float *, int *);
-
-    
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgscon", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-
-    work = complexCalloc( 3*L->nrow );
-
-
-    if ( !work )
-       ABORT("Malloc fails for work arrays in cgscon.");
-    
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-
-    do {
-       clacon_(&L->nrow, &work[L->nrow], &work[0], &ainvnm, &kase);
-
-       if (kase == 0) break;
-
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_ctrsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(U). */
-           sp_ctrsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-           
-       } else {
-
-           /* Multiply by inv(U'). */
-           sp_ctrsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(L'). */
-           sp_ctrsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-           
-       }
-
-    } while ( kase != 0 );
-
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-
-    SUPERLU_FREE (work);
-    return;
-
-} /* cgscon */
-
diff --git a/superlu/cgsequ.c b/superlu/cgsequ.c
deleted file mode 100644
index 9dbacef3..00000000
--- a/superlu/cgsequ.c
+++ /dev/null
@@ -1,205 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  cgsequ.c
- * History:     Modified from LAPACK routine CGEEQU
- */
-#include <math.h>
-#include "slu_cdefs.h"
-
-void
-cgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
-       float *colcnd, float *amax, int *info)
-{
-/*    
-    Purpose   
-    =======   
-
-    CGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_C; Mtype = SLU_GE.
-           
-    R       (output) float*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-           
-    C       (output) float*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-           
-    ROWCND  (output) float*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-           
-    COLCND  (output) float*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-           
-    AMAX    (output) float*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int i, j, irow;
-    float rcmin, rcmax;
-    float bignum, smlnum;
-    extern double slamch_(char *);
-    
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgsequ", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    bignum = 1. / smlnum;
-
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], c_abs1(&Aval[i]) );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], c_abs1(&Aval[i]) * r[irow] );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    return;
-
-} /* cgsequ */
-
-
diff --git a/superlu/cgsrfs.c b/superlu/cgsrfs.c
deleted file mode 100644
index 7b06feec..00000000
--- a/superlu/cgsrfs.c
+++ /dev/null
@@ -1,457 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-/*
- * File name:  cgsrfs.c
- * History:     Modified from lapack routine CGERFS
- */
-#include <math.h>
-#include "slu_cdefs.h"
-
-void
-cgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, float *R, float *C,
-       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
-       SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose   
- *   =======   
- *
- *   CGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = 
SLU_TRLU.
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           cgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) float*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) float*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by cgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) float*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) float*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
count.
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-
-#define ITMAX 5
-    
-    /* Table of constant values */
-    int    ione = 1;
-    complex ndone = {-1., 0.};
-    complex done = {1., 0.};
-    
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    complex   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    float   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    float   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    complex   *work;
-    float   *rwork;
-    int      *iwork;
-    extern double slamch_(char *);
-    extern int clacon_(int *, complex *, complex *, float *, int *);
-#ifdef _CRAY
-    extern int CCOPY(int *, complex *, int *, complex *, int *);
-    extern int CSAXPY(int *, complex *, complex *, int *, complex *, int *);
-#else
-    extern int ccopy_(int *, complex *, int *, complex *, int *);
-    extern int caxpy_(int *, complex *, complex *, int *, complex *, int *);
-#endif
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_C || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgsrfs", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    
-    /* Allocate working space */
-    work = complexMalloc(2*A->nrow);
-    rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
-    iwork = intMalloc(A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = slamch_("Epsilon");
-    safmin = slamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-       
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-
-       while (1) { /* Loop until stopping criterion is satisfied. */
-
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-           
-#ifdef _CRAY
-           CCOPY(&A->nrow, Bptr, &ione, work, &ione);
-#else
-           ccopy_(&A->nrow, Bptr, &ione, work, &ione);
-#endif
-           sp_cgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-
-           for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
-           
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = c_abs1( &Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += c_abs1(&Aval[i]) * c_abs1(&Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, c_abs1(&work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (c_abs1(&work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-#ifdef _CRAY
-               CAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#else
-               caxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#endif
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-        
-       } /* end while */
-
-       stat->RefineSteps = count;
-
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-
-          Use CLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       
-       for (i = 0; i < A->nrow; ++i) rwork[i] = c_abs1( &Bptr[i] );
-       
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = c_abs1( &Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += c_abs1(&Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = c_abs1( &Xptr[irow] );
-                   s += c_abs1(&Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = c_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = c_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-       kase = 0;
-
-       do {
-           clacon_(&A->nrow, &work[A->nrow], work,
-                   &ferr[j], &kase);
-           if (kase == 0) break;
-
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       cs_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) {
-                       cs_mult(&work[i], &work[i], R[i]);
-                    }
-
-               cgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               for (i = 0; i < A->nrow; ++i) {
-                   cs_mult(&work[i], &work[i], rwork[i]);
-               }
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) {
-                   cs_mult(&work[i], &work[i], rwork[i]);
-               }
-               
-               cgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       cs_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       cs_mult(&work[i], &work[i], R[i]);  
-                   }
-           }
-           
-       } while ( kase != 0 );
-
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * c_abs1( &Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * c_abs1( &Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, c_abs1( &Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-
-    } /* for each RHS j ... */
-    
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-
-    return;
-
-} /* cgsrfs */
diff --git a/superlu/cgssv.c b/superlu/cgssv.c
deleted file mode 100644
index 04f62b8a..00000000
--- a/superlu/cgssv.c
+++ /dev/null
@@ -1,231 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_cdefs.h"
-
-void
-cgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * CGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from CGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_C; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    
-    /* Set default values for some parameters */
-    float   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("cgssv", &i);
-       return;
-    }
-
-    utime = stat->utime;
-
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-
-    etree = intMalloc(A->ncol);
-
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    cgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        cgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/cgssvx.c b/superlu/cgssvx.c
deleted file mode 100644
index 905e7c73..00000000
--- a/superlu/cgssvx.c
+++ /dev/null
@@ -1,627 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include "slu_cdefs.h"
-
-void
-cgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, float *R, float *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, 
-       float *rcond, float *ferr, float *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * CGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from cgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) float*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) float*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) float*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) float*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) float*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) float*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-
-    DNformat  *Bstore, *Xstore;
-    complex    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    float    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-
-    /* External functions */
-    extern float clangs(char *, SuperMatrix *);
-    extern double slamch_(char *);
-
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = slamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-#endif
-
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_C || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_C || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_C || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("cgssvx", &i);
-       return;
-    }
-    
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-
-    utime = stat->utime;
-    
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       cCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       cgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           claqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    cs_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
-               }
-        }
-    }
-
-    if ( nofact ) {
-       
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-    
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       cgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = cPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = cPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-
-    if ( options->ConditionNumber ) {
-      if (*info == 0) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = clangs(norm, AA);
-        cgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-      } else *rcond = 0;
-    }
-    
-    if ( *info == 0 && nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-    
-        t0 = SuperLU_timer_();
-        cgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-    
-        /* Use iterative refinement to improve the computed solution and 
compute
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            cgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    cs_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
-                }
-        }
-    } /* end if nrhs > 0 */
-
-    if ( *info == 0 && options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
-    }
-
-    if ( *info != -10000000 && nofact ) {
-        cQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/cgstrf.c b/superlu/cgstrf.c
deleted file mode 100644
index bd3cee09..00000000
--- a/superlu/cgstrf.c
+++ /dev/null
@@ -1,444 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include "slu_cdefs.h"
-
-extern void countnz();
-extern void fixupL();
-
-void
-cgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * CGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_C; Mtype = SLU_GE.
- *
- * drop_tol (input) float (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see cpanel_dfs.c; marker2 is used for inner-factorization,
- *            see ccolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during cpanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in csp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    complex    *cwork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    complex    *dense, *tempv;
-    int       *relax_end;
-    complex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = cLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &cwork);
-    if ( *info ) return;
-    
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    cSetRWork(m, panel_size, cwork, &dense, &tempv);
-    
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-      if (handle_getfem_callback() != 0) {
-       iinfo = *info = -333333333; goto HOUSTON_WE_HAVE_A_PROBLEM; 
-       break;
-      }
-
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = csnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = cLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
)
-                   return;
-           }
-    
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-
-               /* Numeric update within the snode */
-               csnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-               if ( (*info = cpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               
-#ifdef DEBUG
-               cprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol = icol;
-
-       } else { /* Work on one panel of panel_size columns */
-           
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-
-           /* symbolic factor on a panel of columns */
-           cpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           
-           /* numeric sup-panel updates in topological order */
-           cpanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-               nseg = nseg1;   /* Begin after all the panel segments */
-
-               if ((*info = ccolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) 
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-
-               /* Numeric updates */
-               if ((*info = ccolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0) 
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = ccopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-
-               if ( (*info = cpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-
-               /* Prune columns (0:jj-1) using column jj */
-               cpruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-               
-#ifdef DEBUG
-               cprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol += panel_size; /* Move to the next panel */
-
-       } /* else */
-
-    } /* for */
-
-    *info = iinfo;
-    
- HOUSTON_WE_HAVE_A_PROBLEM:
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-
-    if (*info == 0) {
-      countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-      fixupL(min_mn, perm_r, &Glu);
-    }
-
-    cLUWorkFree(iwork, cwork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        cCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_C, SLU_TRLU);
-       cCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_C, SLU_TRU);
-    }
-    
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-
-}
diff --git a/superlu/cgstrs.c b/superlu/cgstrs.c
deleted file mode 100644
index b2d19970..00000000
--- a/superlu/cgstrs.c
+++ /dev/null
@@ -1,344 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include "slu_cdefs.h"
-extern void ctrsm_();
-extern void cgemm_();
-
-/* 
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-void
-cgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * CGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * CGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         cgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_C, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         cgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_C, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_C, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    complex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    complex   *work_col;
-#endif
-    complex   temp_comp;
-    DNformat *Bstore;
-    complex   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    complex   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void cprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_C || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_C || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_C || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("cgstrs", &i);
-       return;
-    }
-
-    n = L->nrow;
-    work = complexCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = complexMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-
-           solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 8 * nrow * nsupc * nrhs;
-           
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       cc_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#else
-               ctrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               cgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#endif
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
-                       work_col[i].r = 0.0;
-                       work_col[i].i = 0.0;
-                       iptr++;
-                   }
-               }
-#else          
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   clsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   cmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
-                       work[i].r = 0.;
-                       work[i].i = 0.;
-                       iptr++;
-                   }
-               }
-#endif             
-           } /* else ... */
-       } /* for L-solve */
-
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       cprint_soln(n, nrhs, Bmat);
-#endif
-
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-
-           solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   c_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-               ctrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else          
-               for (j = 0; j < nrhs; j++)
-                   cusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif         
-           }
-
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       cc_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
-                       c_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           }
-           
-       } /* for U-solve */
-
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       cprint_soln(n, nrhs, Bmat);
-#endif
-
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-       stat->ops[SOLVE] = 0;
-        if (trans == TRANS) {
-           for (k = 0; k < nrhs; ++k) {
-               /* Multiply by inv(U'). */
-               sp_ctrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-           
-               /* Multiply by inv(L'). */
-               sp_ctrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         } else { /* trans == CONJ */
-            for (k = 0; k < nrhs; ++k) {                
-                /* Multiply by conj(inv(U')). */
-                sp_ctrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);
-                
-                /* Multiply by conj(inv(L')). */
-                sp_ctrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-cprint_soln(int n, int nrhs, complex *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i].r);
-}
diff --git a/superlu/clacon.c b/superlu/clacon.c
deleted file mode 100644
index 6e332f67..00000000
--- a/superlu/clacon.c
+++ /dev/null
@@ -1,236 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_scomplex.h"
-extern void ccopy_();
-
-int
-clacon_(int *n, complex *v, complex *x, float *est, int *kase)
-
-{
-/*
-    Purpose   
-    =======   
-
-    CLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-  
-
-    Arguments   
-    =========   
-
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-
-    V      (workspace) COMPLEX PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-
-    X      (input/output) COMPLEX PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           where A' is the conjugate transpose of A,
-           and CLACON must be re-called with all the other parameters   
-           unchanged.   
-
-
-    EST    (output) FLOAT PRECISION   
-           An estimate (a lower bound) for norm(A).   
-
-    KASE   (input/output) INT
-           On the initial call to CLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from CLACON, KASE will again be 0.   
-
-    Further Details   
-    ======= =======   
-
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-*/
-
-    /* Table of constant values */
-    int c__1 = 1;
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-
-    /* System generated locals */
-    float d__1;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static float altsgn, estold;
-    static int i, j;
-    float temp;
-    float safmin;
-    extern double slamch_(char *);
-    extern int icmax1_(int *, complex *, int *);
-    extern double scsum1_(int *, complex *, int *);
-
-    safmin = slamch_("Safe minimum");
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i].r = 1. / (float) (*n);
-           x[i].i = 0.;
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = c_abs(&v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-    *est = scsum1_(n, x, &c__1);
-
-    for (i = 0; i < *n; ++i) {
-       d__1 = c_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-    j = icmax1_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    CCOPY(n, x, &c__1, v, &c__1);
-#else
-    ccopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-    *est = scsum1_(n, v, &c__1);
-
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-       d__1 = c_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-          X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-    j = icmax1_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1].r = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
-       x[i-1].i = 0.;
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-    temp = scsum1_(n, x, &c__1) / (float)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-       CCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-       ccopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-       *est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* clacon_ */
diff --git a/superlu/clangs.c b/superlu/clangs.c
deleted file mode 100644
index f24c3808..00000000
--- a/superlu/clangs.c
+++ /dev/null
@@ -1,132 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:  clangs.c
- * History:     Modified from lapack routine CLANGE
- */
-#include <math.h>
-#include "slu_cdefs.h"
-
-float clangs(char *norm, SuperMatrix *A)
-{
-/* 
-    Purpose   
-    =======   
-
-    CLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-
-    Description   
-    ===========   
-
-    CLANGE returns the value   
-
-       CLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-
-    Arguments   
-    =========   
-
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in CLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-
-   ===================================================================== 
-*/
-    
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int      i, j, irow;
-    float   value, sum;
-    float   *rwork;
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-       
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, c_abs( &Aval[i]) );
-       
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += c_abs( &Aval[i] );
-           value = SUPERLU_MAX(value,sum);
-       }
-       
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (float *) SUPERLU_MALLOC(A->nrow * sizeof(float))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += c_abs( &Aval[i] );
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       
-       SUPERLU_FREE (rwork);
-       
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-
-    return (value);
-
-} /* clangs */
-
diff --git a/superlu/claqgs.c b/superlu/claqgs.c
deleted file mode 100644
index cfecc376..00000000
--- a/superlu/claqgs.c
+++ /dev/null
@@ -1,160 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:  claqgs.c
- * History:     Modified from LAPACK routine CLAQGE
- */
-#include <math.h>
-#include "slu_cdefs.h"
-
-void
-claqgs(SuperMatrix *A, float *r, float *c, 
-       float rowcnd, float colcnd, float amax, char *equed)
-{
-/*
-    Purpose   
-    =======   
-
-    CLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-
-    Arguments   
-    =========   
-
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_C; Mtype = GE.
-           
-    R       (input) float*, dimension (A->nrow)
-            The row scale factors for A.
-           
-    C       (input) float*, dimension (A->ncol)
-            The column scale factors for A.
-           
-    ROWCND  (input) float
-            Ratio of the smallest R(i) to the largest R(i).
-           
-    COLCND  (input) float
-            Ratio of the smallest C(i) to the largest C(i).
-           
-    AMAX    (input) float
-            Absolute value of largest matrix entry.
-           
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-
-    Internal Parameters   
-    ===================   
-
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-
-    ===================================================================== 
-*/
-
-#define THRESH    (0.1)
-    
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int i, j, irow;
-    float large, small, cj;
-    extern double slamch_(char *);
-    float temp;
-
-
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Initialize LARGE and SMALL. */
-    small = slamch_("Safe minimum") / slamch_("Precision");
-    large = 1. / small;
-
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   cs_mult(&Aval[i], &Aval[i], cj);
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               cs_mult(&Aval[i], &Aval[i], r[irow]);
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               temp = cj * r[irow];
-               cs_mult(&Aval[i], &Aval[i], temp);
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-
-    return;
-
-} /* claqgs */
-
diff --git a/superlu/cmemory.c b/superlu/cmemory.c
deleted file mode 100644
index 3f364b46..00000000
--- a/superlu/cmemory.c
+++ /dev/null
@@ -1,691 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include "slu_cdefs.h"
-
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-
-/* Internal prototypes */
-void  *cexpand (int *, MemType,int, int, GlobalLU_t *);
-int   cLUWorkInit (int, int, int, int **, complex **, LU_space_t);
-void  copy_mem_complex (int, void *, void *);
-void  cStackCompress (GlobalLU_t *);
-void  cSetupSpace (void *, int, LU_space_t *);
-void  *cuser_malloc (int, int);
-void  cuser_free (int, int);
-
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(complex) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
-
-
-
-
-/*
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void cSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-{
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-}
-
-
-
-void *cuser_malloc(int bytes, int which_end)
-{
-    void *buf;
-    
-    if ( StackFull(bytes) ) return (NULL);
-
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    
-    stack.used += bytes;
-    return buf;
-}
-
-
-void cuser_free(int bytes, int which_end)
-{
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
-}
-
-
-
-/*
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int cQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(complex);
-
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-
-    mem_usage->expansions = --no_expand;
-
-    return 0;
-} /* cQuerySpace */
-
-/*
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-int
-cLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, complex **dwork)
-{
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    complex   *lusup;
-    int      *xlusup;
-    complex   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(complex);
-
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           cSetupSpace(work, lwork, &Glu->MemModel);
-       }
-       
-#if ( PRNTlevel >= 1 )
-       printf("cLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-#endif 
-       
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)cuser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)cuser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)cuser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)cuser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)cuser_malloc((n+1) * iword, HEAD);
-       }
-
-       lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
-
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               cuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("cLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-#endif
-           lusup = (complex *) cexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (complex *) cexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    cexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    cexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-       
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    
-    info = cLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + cmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    
-    ++no_expand;
-    return 0;
-    
-} /* cLUMemInit */
-
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-int
-cLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            complex **dworkptr, LU_space_t MemModel)
-{
-    int    isize, dsize, extra;
-    complex *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(complex);
-    
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) cuser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "cLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-
-    if ( MemModel == SYSTEM )
-       *dworkptr = (complex *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (complex *) cuser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (complex*) DoubleAlign(*dworkptr);
-           *dworkptr = (complex*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("cLUWorkInit: not aligned, extra %d\n", extra);
-#endif     
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-       
-    return 0;
-}
-
-
-/*
- * Set up pointers for real working arrays.
- */
-void
-cSetRWork(int m, int panel_size, complex *dworkptr,
-        complex **dense, complex **tempv)
-{
-    complex zero = {0.0, 0.0};
-
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    cfill (*dense, m * panel_size, zero);
-    cfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
-}
-       
-/*
- * Free the working storage used by factor routines.
- */
-void cLUWorkFree(int *iwork, complex *dwork, GlobalLU_t *Glu)
-{
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     cStackCompress(Glu);  */
-    }
-    
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-}
-
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-cLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
*/
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-{
-    void   *new_mem;
-    
-#ifdef DEBUG    
-    printf("cLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-#endif    
-
-    if (mem_type == USUB) 
-       new_mem = cexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = cexpand(maxlen, mem_type, next, 0, Glu);
-    
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (cmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (complex *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (complex *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    
-    return 0;
-    
-}
-
-
-
-void
-copy_mem_complex(int howmany, void *old, void *new)
-{
-    register int i;
-    complex *dold = old;
-    complex *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
-}
-
-/*
- * Expand the existing storage to accommodate more fill-ins.
- */
-void
-*cexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-{
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-
-    alpha = EXPAND;
-
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(complex);
-
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_complex(len_to_copy, expanders[type].mem, new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-       
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = cuser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-#endif         
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-               
-           } /* if ... */
-
-       } /* else ... */
-    }
-
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    
-    return (void *) expanders[type].mem;
-    
-} /* cexpand */
-
-
-/*
- * Compress the work[] array to remove fragmentation.
- */
-void
-cStackCompress(GlobalLU_t *Glu)
-{
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    complex   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    complex   *ucol, *lusup;
-    
-    iword = sizeof(int);
-    dword = sizeof(complex);
-    ndim = Glu->n;
-
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    
-    dfrom = ucol;
-    dto = (complex *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_complex(xusub[ndim], dfrom, dto);
-    ucol = dto;
-
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-    
-#ifdef DEBUG
-    printf("cStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
-#endif    
-    
-}
-
-/*
- * Allocate storage for original matrix A
- */
-void
-callocateA(int n, int nnz, complex **a, int **asub, int **xa)
-{
-    *a    = (complex *) complexMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-}
-
-
-complex *complexMalloc(int n)
-{
-    complex *buf;
-    buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in complexMalloc()\n");
-    }
-    return (buf);
-}
-
-complex *complexCalloc(int n)
-{
-    complex *buf;
-    register int i;
-    complex zero = {0.0, 0.0};
-    buf = (complex *) SUPERLU_MALLOC((size_t)n * sizeof(complex));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in complexCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-}
-
-
-int cmemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-{
-    register int iword, dword;
-
-    iword   = sizeof(int);
-    dword   = sizeof(complex);
-    
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
-
-}
diff --git a/superlu/cmyblas2.c b/superlu/cmyblas2.c
deleted file mode 100644
index 7717baef..00000000
--- a/superlu/cmyblas2.c
+++ /dev/null
@@ -1,204 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:          cmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-#include "slu_scomplex.h"
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void clsolve ( int ldm, int ncol, complex *M, complex *rhs )
-{
-    int k;
-    complex x0, x1, x2, x3, temp;
-    complex *M0;
-    complex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-       Mki0 = M0 + 1;
-       Mki1 = Mki0 + ldm + 1;
-       Mki2 = Mki1 + ldm + 1;
-       Mki3 = Mki2 + ldm + 1;
-
-       x0 = rhs[firstcol];
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x1, &rhs[firstcol+1], &temp);
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x2, &rhs[firstcol+2], &temp);
-       cc_mult(&temp, &x1, Mki1); Mki1++;
-       c_sub(&x2, &x2, &temp);
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x3, &rhs[firstcol+3], &temp);
-       cc_mult(&temp, &x1, Mki1); Mki1++;
-       c_sub(&x3, &x3, &temp);
-       cc_mult(&temp, &x2, Mki2); Mki2++;
-       c_sub(&x3, &x3, &temp);
-
-       rhs[++firstcol] = x1;
-       rhs[++firstcol] = x2;
-       rhs[++firstcol] = x3;
-       ++firstcol;
-    
-       for (k = firstcol; k < ncol; k++) {
-           cc_mult(&temp, &x0, Mki0); Mki0++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x1, Mki1); Mki1++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x2, Mki2); Mki2++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x3, Mki3); Mki3++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-       }
-
-        M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-        Mki0 = M0 + 1;
-        Mki1 = Mki0 + ldm + 1;
-
-        x0 = rhs[firstcol];
-       cc_mult(&temp, &x0, Mki0); Mki0++;
-       c_sub(&x1, &rhs[firstcol+1], &temp);
-
-       rhs[++firstcol] = x1;
-       ++firstcol;
-    
-       for (k = firstcol; k < ncol; k++) {
-           cc_mult(&temp, &x0, Mki0); Mki0++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-           cc_mult(&temp, &x1, Mki1); Mki1++;
-           c_sub(&rhs[k], &rhs[k], &temp);
-       } 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-cusolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-complex *M;    /* in */
-complex *rhs;  /* modified */
-{
-    complex xj, temp;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-       c_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       
-       for (irow = 0; irow < jcol; irow++) {
-           cc_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
-           c_sub(&rhs[irow], &rhs[irow], &temp);
-       }
-
-       jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void cmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-complex *M;    /* in */
-complex *vec;  /* in */
-complex *Mxvec;        /* in/out */
-{
-    complex vi0, vi1, vi2, vi3;
-    complex *M0, temp;
-    complex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) {
-           cc_mult(&temp, &vi0, Mki0); Mki0++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-           cc_mult(&temp, &vi1, Mki1); Mki1++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-           cc_mult(&temp, &vi2, Mki2); Mki2++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-           cc_mult(&temp, &vi3, Mki3); Mki3++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-
-       M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {                /* Do 1 column */
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++) {
-           cc_mult(&temp, &vi0, Mki0); Mki0++;
-           c_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-       M0 += ldm;
-    }
-       
-}
-
diff --git a/superlu/colamd.c b/superlu/colamd.c
deleted file mode 100644
index dc531f04..00000000
--- a/superlu/colamd.c
+++ /dev/null
@@ -1,3412 +0,0 @@
-/* ========================================================================== 
*/
-/* === colamd/symamd - a sparse matrix column ordering algorithm ============ 
*/
-/* ========================================================================== 
*/
-
-/*
-    colamd:  an approximate minimum degree column ordering algorithm,
-       for LU factorization of symmetric or unsymmetric matrices,
-       QR factorization, least squares, interior point methods for
-       linear programming problems, and other related problems.
-
-    symamd:  an approximate minimum degree ordering algorithm for Cholesky
-       factorization of symmetric matrices.
-
-    Purpose:
-
-       Colamd computes a permutation Q such that the Cholesky factorization of
-       (AQ)'(AQ) has less fill-in and requires fewer floating point operations
-       than A'A.  This also provides a good ordering for sparse partial
-       pivoting methods, P(AQ) = LU, where Q is computed prior to numerical
-       factorization, and P is computed during numerical factorization via
-       conventional partial pivoting with row interchanges.  Colamd is the
-       column ordering method used in SuperLU, part of the ScaLAPACK library.
-       It is also available as built-in function in MATLAB Version 6,
-       available from MathWorks, Inc. (http://www.mathworks.com).  This
-       routine can be used in place of colmmd in MATLAB.
-
-       Symamd computes a permutation P of a symmetric matrix A such that the
-       Cholesky factorization of PAP' has less fill-in and requires fewer
-       floating point operations than A.  Symamd constructs a matrix M such
-       that M'M has the same nonzero pattern of A, and then orders the columns
-       of M using colmmd.  The column ordering of M is then returned as the
-       row and column ordering P of A. 
-
-    Authors:
-
-       The authors of the code itself are Stefan I. Larimore and Timothy A.
-       Davis (davis@cise.ufl.edu), University of Florida.  The algorithm was
-       developed in collaboration with John Gilbert, Xerox PARC, and Esmond
-       Ng, Oak Ridge National Laboratory.
-
-    Date:
-
-       September 8, 2003.  Version 2.3.
-
-    Acknowledgements:
-
-       This work was supported by the National Science Foundation, under
-       grants DMS-9504974 and DMS-9803599.
-
-    Copyright and License:
-
-       Copyright (c) 1998-2003 by the University of Florida.
-       All Rights Reserved.
-
-       THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-       EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
-
-       Permission is hereby granted to use, copy, modify, and/or distribute
-       this program, provided that the Copyright, this License, and the
-       Availability of the original version is retained on all copies and made
-       accessible to the end-user of any code or package that includes COLAMD
-       or any modified version of COLAMD. 
-
-    Availability:
-
-       The colamd/symamd library is available at
-
-           http://www.cise.ufl.edu/research/sparse/colamd/
-
-       This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.c
-       file.  It requires the colamd.h file.  It is required by the colamdmex.c
-       and symamdmex.c files, for the MATLAB interface to colamd and symamd.
-
-    See the ChangeLog file for changes since Version 1.0.
-
-*/
-
-/* ========================================================================== 
*/
-/* === Description of user-callable routines ================================ 
*/
-/* ========================================================================== 
*/
-
-/*
-    
----------------------------------------------------------------------------
-    colamd_recommended:
-    
----------------------------------------------------------------------------
-
-       C syntax:
-
-           #include "colamd.h"
-           int colamd_recommended (int nnz, int n_row, int n_col) ;
-
-           or as a C macro
-
-           #include "colamd.h"
-           Alen = COLAMD_RECOMMENDED (int nnz, int n_row, int n_col) ;
-
-       Purpose:
-
-           Returns recommended value of Alen for use by colamd.  Returns -1
-           if any input argument is negative.  The use of this routine
-           or macro is optional.  Note that the macro uses its arguments
-           more than once, so be careful for side effects, if you pass
-           expressions as arguments to COLAMD_RECOMMENDED.  Not needed for
-           symamd, which dynamically allocates its own memory.
-
-       Arguments (all input arguments):
-
-           int nnz ;           Number of nonzeros in the matrix A.  This must
-                               be the same value as p [n_col] in the call to
-                               colamd - otherwise you will get a wrong value
-                               of the recommended memory to use.
-
-           int n_row ;         Number of rows in the matrix A.
-
-           int n_col ;         Number of columns in the matrix A.
-
-    
----------------------------------------------------------------------------
-    colamd_set_defaults:
-    
----------------------------------------------------------------------------
-
-       C syntax:
-
-           #include "colamd.h"
-           colamd_set_defaults (double knobs [COLAMD_KNOBS]) ;
-
-       Purpose:
-
-           Sets the default parameters.  The use of this routine is optional.
-
-       Arguments:
-
-           double knobs [COLAMD_KNOBS] ;       Output only.
-
-               Colamd: rows with more than (knobs [COLAMD_DENSE_ROW] * n_col)
-               entries are removed prior to ordering.  Columns with more than
-               (knobs [COLAMD_DENSE_COL] * n_row) entries are removed prior to
-               ordering, and placed last in the output column ordering. 
-
-               Symamd: uses only knobs [COLAMD_DENSE_ROW], which is knobs [0].
-               Rows and columns with more than (knobs [COLAMD_DENSE_ROW] * n)
-               entries are removed prior to ordering, and placed last in the
-               output ordering.
-
-               COLAMD_DENSE_ROW and COLAMD_DENSE_COL are defined as 0 and 1,
-               respectively, in colamd.h.  Default values of these two knobs
-               are both 0.5.  Currently, only knobs [0] and knobs [1] are
-               used, but future versions may use more knobs.  If so, they will
-               be properly set to their defaults by the future version of
-               colamd_set_defaults, so that the code that calls colamd will
-               not need to change, assuming that you either use
-               colamd_set_defaults, or pass a (double *) NULL pointer as the
-               knobs array to colamd or symamd.
-
-    
----------------------------------------------------------------------------
-    colamd:
-    
----------------------------------------------------------------------------
-
-       C syntax:
-
-           #include "colamd.h"
-           int colamd (int n_row, int n_col, int Alen, int *A, int *p,
-               double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS]) ;
-
-       Purpose:
-
-           Computes a column ordering (Q) of A such that P(AQ)=LU or
-           (AQ)'AQ=LL' have less fill-in and require fewer floating point
-           operations than factorizing the unpermuted matrix A or A'A,
-           respectively.
-           
-       Returns:
-
-           TRUE (1) if successful, FALSE (0) otherwise.
-
-       Arguments:
-
-           int n_row ;         Input argument.
-
-               Number of rows in the matrix A.
-               Restriction:  n_row >= 0.
-               Colamd returns FALSE if n_row is negative.
-
-           int n_col ;         Input argument.
-
-               Number of columns in the matrix A.
-               Restriction:  n_col >= 0.
-               Colamd returns FALSE if n_col is negative.
-
-           int Alen ;          Input argument.
-
-               Restriction (see note):
-               Alen >= 2*nnz + 6*(n_col+1) + 4*(n_row+1) + n_col
-               Colamd returns FALSE if these conditions are not met.
-
-               Note:  this restriction makes an modest assumption regarding
-               the size of the two typedef's structures in colamd.h.
-               We do, however, guarantee that
-
-                       Alen >= colamd_recommended (nnz, n_row, n_col)
-               
-               or equivalently as a C preprocessor macro: 
-
-                       Alen >= COLAMD_RECOMMENDED (nnz, n_row, n_col)
-
-               will be sufficient.
-
-           int A [Alen] ;      Input argument, undefined on output.
-
-               A is an integer array of size Alen.  Alen must be at least as
-               large as the bare minimum value given above, but this is very
-               low, and can result in excessive run time.  For best
-               performance, we recommend that Alen be greater than or equal to
-               colamd_recommended (nnz, n_row, n_col), which adds
-               nnz/5 to the bare minimum value given above.
-
-               On input, the row indices of the entries in column c of the
-               matrix are held in A [(p [c]) ... (p [c+1]-1)].  The row indices
-               in a given column c need not be in ascending order, and
-               duplicate row indices may be be present.  However, colamd will
-               work a little faster if both of these conditions are met
-               (Colamd puts the matrix into this format, if it finds that the
-               the conditions are not met).
-
-               The matrix is 0-based.  That is, rows are in the range 0 to
-               n_row-1, and columns are in the range 0 to n_col-1.  Colamd
-               returns FALSE if any row index is out of range.
-
-               The contents of A are modified during ordering, and are
-               undefined on output.
-
-           int p [n_col+1] ;   Both input and output argument.
-
-               p is an integer array of size n_col+1.  On input, it holds the
-               "pointers" for the column form of the matrix A.  Column c of
-               the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
-               entry, p [0], must be zero, and p [c] <= p [c+1] must hold
-               for all c in the range 0 to n_col-1.  The value p [n_col] is
-               thus the total number of entries in the pattern of the matrix A.
-               Colamd returns FALSE if these conditions are not met.
-
-               On output, if colamd returns TRUE, the array p holds the column
-               permutation (Q, for P(AQ)=LU or (AQ)'(AQ)=LL'), where p [0] is
-               the first column index in the new ordering, and p [n_col-1] is
-               the last.  That is, p [k] = j means that column j of A is the
-               kth pivot column, in AQ, where k is in the range 0 to n_col-1
-               (p [0] = j means that column j of A is the first column in AQ).
-
-               If colamd returns FALSE, then no permutation is returned, and
-               p is undefined on output.
-
-           double knobs [COLAMD_KNOBS] ;       Input argument.
-
-               See colamd_set_defaults for a description.
-
-           int stats [COLAMD_STATS] ;          Output argument.
-
-               Statistics on the ordering, and error status.
-               See colamd.h for related definitions.
-               Colamd returns FALSE if stats is not present.
-
-               stats [0]:  number of dense or empty rows ignored.
-
-               stats [1]:  number of dense or empty columns ignored (and
-                               ordered last in the output permutation p)
-                               Note that a row can become "empty" if it
-                               contains only "dense" and/or "empty" columns,
-                               and similarly a column can become "empty" if it
-                               only contains "dense" and/or "empty" rows.
-
-               stats [2]:  number of garbage collections performed.
-                               This can be excessively high if Alen is close
-                               to the minimum required value.
-
-               stats [3]:  status code.  < 0 is an error code.
-                           > 1 is a warning or notice.
-
-                       0       OK.  Each column of the input matrix contained
-                               row indices in increasing order, with no
-                               duplicates.
-
-                       1       OK, but columns of input matrix were jumbled
-                               (unsorted columns or duplicate entries).  Colamd
-                               had to do some extra work to sort the matrix
-                               first and remove duplicate entries, but it
-                               still was able to return a valid permutation
-                               (return value of colamd was TRUE).
-
-                                       stats [4]: highest numbered column that
-                                               is unsorted or has duplicate
-                                               entries.
-                                       stats [5]: last seen duplicate or
-                                               unsorted row index.
-                                       stats [6]: number of duplicate or
-                                               unsorted row indices.
-
-                       -1      A is a null pointer
-
-                       -2      p is a null pointer
-
-                       -3      n_row is negative
-
-                                       stats [4]: n_row
-
-                       -4      n_col is negative
-
-                                       stats [4]: n_col
-
-                       -5      number of nonzeros in matrix is negative
-
-                                       stats [4]: number of nonzeros, p [n_col]
-
-                       -6      p [0] is nonzero
-
-                                       stats [4]: p [0]
-
-                       -7      A is too small
-
-                                       stats [4]: required size
-                                       stats [5]: actual size (Alen)
-
-                       -8      a column has a negative number of entries
-
-                                       stats [4]: column with < 0 entries
-                                       stats [5]: number of entries in col
-
-                       -9      a row index is out of bounds
-
-                                       stats [4]: column with bad row index
-                                       stats [5]: bad row index
-                                       stats [6]: n_row, # of rows of matrx
-
-                       -10     (unused; see symamd.c)
-
-                       -999    (unused; see symamd.c)
-
-               Future versions may return more statistics in the stats array.
-
-       Example:
-       
-           See http://www.cise.ufl.edu/research/sparse/colamd/example.c
-           for a complete example.
-
-           To order the columns of a 5-by-4 matrix with 11 nonzero entries in
-           the following nonzero pattern
-
-               x 0 x 0
-               x 0 x x
-               0 x x 0
-               0 0 x x
-               x x 0 0
-
-           with default knobs and no output statistics, do the following:
-
-               #include "colamd.h"
-               #define ALEN COLAMD_RECOMMENDED (11, 5, 4)
-               int A [ALEN] = {1, 2, 5, 3, 5, 1, 2, 3, 4, 2, 4} ;
-               int p [ ] = {0, 3, 5, 9, 11} ;
-               int stats [COLAMD_STATS] ;
-               colamd (5, 4, ALEN, A, p, (double *) NULL, stats) ;
-
-           The permutation is returned in the array p, and A is destroyed.
-
-    
----------------------------------------------------------------------------
-    symamd:
-    
----------------------------------------------------------------------------
-
-       C syntax:
-
-           #include "colamd.h"
-           int symamd (int n, int *A, int *p, int *perm,
-               double knobs [COLAMD_KNOBS], int stats [COLAMD_STATS],
-               void (*allocate) (size_t, size_t), void (*release) (void *)) ;
-
-       Purpose:
-
-           The symamd routine computes an ordering P of a symmetric sparse
-           matrix A such that the Cholesky factorization PAP' = LL' remains
-           sparse.  It is based on a column ordering of a matrix M constructed
-           so that the nonzero pattern of M'M is the same as A.  The matrix A
-           is assumed to be symmetric; only the strictly lower triangular part
-           is accessed.  You must pass your selected memory allocator (usually
-           calloc/free or mxCalloc/mxFree) to symamd, for it to allocate
-           memory for the temporary matrix M.
-
-       Returns:
-
-           TRUE (1) if successful, FALSE (0) otherwise.
-
-       Arguments:
-
-           int n ;             Input argument.
-
-               Number of rows and columns in the symmetrix matrix A.
-               Restriction:  n >= 0.
-               Symamd returns FALSE if n is negative.
-
-           int A [nnz] ;       Input argument.
-
-               A is an integer array of size nnz, where nnz = p [n].
-               
-               The row indices of the entries in column c of the matrix are
-               held in A [(p [c]) ... (p [c+1]-1)].  The row indices in a
-               given column c need not be in ascending order, and duplicate
-               row indices may be present.  However, symamd will run faster
-               if the columns are in sorted order with no duplicate entries. 
-
-               The matrix is 0-based.  That is, rows are in the range 0 to
-               n-1, and columns are in the range 0 to n-1.  Symamd
-               returns FALSE if any row index is out of range.
-
-               The contents of A are not modified.
-
-           int p [n+1] ;       Input argument.
-
-               p is an integer array of size n+1.  On input, it holds the
-               "pointers" for the column form of the matrix A.  Column c of
-               the matrix A is held in A [(p [c]) ... (p [c+1]-1)].  The first
-               entry, p [0], must be zero, and p [c] <= p [c+1] must hold
-               for all c in the range 0 to n-1.  The value p [n] is
-               thus the total number of entries in the pattern of the matrix A.
-               Symamd returns FALSE if these conditions are not met.
-
-               The contents of p are not modified.
-
-           int perm [n+1] ;    Output argument.
-
-               On output, if symamd returns TRUE, the array perm holds the
-               permutation P, where perm [0] is the first index in the new
-               ordering, and perm [n-1] is the last.  That is, perm [k] = j
-               means that row and column j of A is the kth column in PAP',
-               where k is in the range 0 to n-1 (perm [0] = j means
-               that row and column j of A are the first row and column in
-               PAP').  The array is used as a workspace during the ordering,
-               which is why it must be of length n+1, not just n.
-
-           double knobs [COLAMD_KNOBS] ;       Input argument.
-
-               See colamd_set_defaults for a description.
-
-           int stats [COLAMD_STATS] ;          Output argument.
-
-               Statistics on the ordering, and error status.
-               See colamd.h for related definitions.
-               Symamd returns FALSE if stats is not present.
-
-               stats [0]:  number of dense or empty row and columns ignored
-                               (and ordered last in the output permutation 
-                               perm).  Note that a row/column can become
-                               "empty" if it contains only "dense" and/or
-                               "empty" columns/rows.
-
-               stats [1]:  (same as stats [0])
-
-               stats [2]:  number of garbage collections performed.
-
-               stats [3]:  status code.  < 0 is an error code.
-                           > 1 is a warning or notice.
-
-                       0       OK.  Each column of the input matrix contained
-                               row indices in increasing order, with no
-                               duplicates.
-
-                       1       OK, but columns of input matrix were jumbled
-                               (unsorted columns or duplicate entries).  Symamd
-                               had to do some extra work to sort the matrix
-                               first and remove duplicate entries, but it
-                               still was able to return a valid permutation
-                               (return value of symamd was TRUE).
-
-                                       stats [4]: highest numbered column that
-                                               is unsorted or has duplicate
-                                               entries.
-                                       stats [5]: last seen duplicate or
-                                               unsorted row index.
-                                       stats [6]: number of duplicate or
-                                               unsorted row indices.
-
-                       -1      A is a null pointer
-
-                       -2      p is a null pointer
-
-                       -3      (unused, see colamd.c)
-
-                       -4      n is negative
-
-                                       stats [4]: n
-
-                       -5      number of nonzeros in matrix is negative
-
-                                       stats [4]: # of nonzeros (p [n]).
-
-                       -6      p [0] is nonzero
-
-                                       stats [4]: p [0]
-
-                       -7      (unused)
-
-                       -8      a column has a negative number of entries
-
-                                       stats [4]: column with < 0 entries
-                                       stats [5]: number of entries in col
-
-                       -9      a row index is out of bounds
-
-                                       stats [4]: column with bad row index
-                                       stats [5]: bad row index
-                                       stats [6]: n_row, # of rows of matrx
-
-                       -10     out of memory (unable to allocate temporary
-                               workspace for M or count arrays using the
-                               "allocate" routine passed into symamd).
-
-                       -999    internal error.  colamd failed to order the
-                               matrix M, when it should have succeeded.  This
-                               indicates a bug.  If this (and *only* this)
-                               error code occurs, please contact the authors.
-                               Don't contact the authors if you get any other
-                               error code.
-
-               Future versions may return more statistics in the stats array.
-
-           void * (*allocate) (size_t, size_t)
-
-               A pointer to a function providing memory allocation.  The
-               allocated memory must be returned initialized to zero.  For a
-               C application, this argument should normally be a pointer to
-               calloc.  For a MATLAB mexFunction, the routine mxCalloc is
-               passed instead.
-
-           void (*release) (size_t, size_t)
-
-               A pointer to a function that frees memory allocated by the
-               memory allocation routine above.  For a C application, this
-               argument should normally be a pointer to free.  For a MATLAB
-               mexFunction, the routine mxFree is passed instead.
-
-
-    
----------------------------------------------------------------------------
-    colamd_report:
-    
----------------------------------------------------------------------------
-
-       C syntax:
-
-           #include "colamd.h"
-           colamd_report (int stats [COLAMD_STATS]) ;
-
-       Purpose:
-
-           Prints the error status and statistics recorded in the stats
-           array on the standard error output (for a standard C routine)
-           or on the MATLAB output (for a mexFunction).
-
-       Arguments:
-
-           int stats [COLAMD_STATS] ;  Input only.  Statistics from colamd.
-
-
-    
----------------------------------------------------------------------------
-    symamd_report:
-    
----------------------------------------------------------------------------
-
-       C syntax:
-
-           #include "colamd.h"
-           symamd_report (int stats [COLAMD_STATS]) ;
-
-       Purpose:
-
-           Prints the error status and statistics recorded in the stats
-           array on the standard error output (for a standard C routine)
-           or on the MATLAB output (for a mexFunction).
-
-       Arguments:
-
-           int stats [COLAMD_STATS] ;  Input only.  Statistics from symamd.
-
-
-*/
-
-/* ========================================================================== 
*/
-/* === Scaffolding code definitions  ======================================== 
*/
-/* ========================================================================== 
*/
-
-/* Ensure that debugging is turned off: */
-#ifndef NDEBUG
-#define NDEBUG
-#endif /* NDEBUG */
-
-/*
-   Our "scaffolding code" philosophy:  In our opinion, well-written library
-   code should keep its "debugging" code, and just normally have it turned off
-   by the compiler so as not to interfere with performance.  This serves
-   several purposes:
-
-   (1) assertions act as comments to the reader, telling you what the code
-       expects at that point.  All assertions will always be true (unless
-       there really is a bug, of course).
-
-   (2) leaving in the scaffolding code assists anyone who would like to modify
-       the code, or understand the algorithm (by reading the debugging output,
-       one can get a glimpse into what the code is doing).
-
-   (3) (gasp!) for actually finding bugs.  This code has been heavily tested
-       and "should" be fully functional and bug-free ... but you never know...
-
-    To enable debugging, comment out the "#define NDEBUG" above.  For a MATLAB
-    mexFunction, you will also need to modify mexopts.sh to remove the -DNDEBUG
-    definition.  The code will become outrageously slow when debugging is
-    enabled.  To control the level of debugging output, set an environment
-    variable D to 0 (little), 1 (some), 2, 3, or 4 (lots).  When debugging,
-    you should see the following message on the standard output:
-
-       colamd: debug version, D = 1 (THIS WILL BE SLOW!)
-
-    or a similar message for symamd.  If you don't, then debugging has not
-    been enabled.
-
-*/
-
-/* ========================================================================== 
*/
-/* === Include files ======================================================== 
*/
-/* ========================================================================== 
*/
-
-#include "colamd.h"
-#include <limits.h>
-
-#ifdef MATLAB_MEX_FILE
-#include "mex.h"
-#include "matrix.h"
-#else
-#include <stdio.h>
-#include <assert.h>
-#endif /* MATLAB_MEX_FILE */
-
-/* ========================================================================== 
*/
-/* === Definitions ========================================================== 
*/
-/* ========================================================================== 
*/
-
-/* Routines are either PUBLIC (user-callable) or PRIVATE (not user-callable) */
-#define PUBLIC
-#define PRIVATE static
-
-#define MAX(a,b) (((a) > (b)) ? (a) : (b))
-#define MIN(a,b) (((a) < (b)) ? (a) : (b))
-
-#define ONES_COMPLEMENT(r) (-(r)-1)
-
-/* -------------------------------------------------------------------------- 
*/
-/* Change for version 2.1:  define TRUE and FALSE only if not yet defined */  
-/* -------------------------------------------------------------------------- 
*/
-
-#ifndef TRUE
-#define TRUE (1)
-#endif
-
-#ifndef FALSE
-#define FALSE (0)
-#endif
-
-/* -------------------------------------------------------------------------- 
*/
-
-#define EMPTY  (-1)
-
-/* Row and column status */
-#define ALIVE  (0)
-#define DEAD   (-1)
-
-/* Column status */
-#define DEAD_PRINCIPAL         (-1)
-#define DEAD_NON_PRINCIPAL     (-2)
-
-/* Macros for row and column status update and checking. */
-#define ROW_IS_DEAD(r)                 ROW_IS_MARKED_DEAD (Row[r].shared2.mark)
-#define ROW_IS_MARKED_DEAD(row_mark)   (row_mark < ALIVE)
-#define ROW_IS_ALIVE(r)                        (Row [r].shared2.mark >= ALIVE)
-#define COL_IS_DEAD(c)                 (Col [c].start < ALIVE)
-#define COL_IS_ALIVE(c)                        (Col [c].start >= ALIVE)
-#define COL_IS_DEAD_PRINCIPAL(c)       (Col [c].start == DEAD_PRINCIPAL)
-#define KILL_ROW(r)                    { Row [r].shared2.mark = DEAD ; }
-#define KILL_PRINCIPAL_COL(c)          { Col [c].start = DEAD_PRINCIPAL ; }
-#define KILL_NON_PRINCIPAL_COL(c)      { Col [c].start = DEAD_NON_PRINCIPAL ; }
-
-/* ========================================================================== 
*/
-/* === Colamd reporting mechanism =========================================== 
*/
-/* ========================================================================== 
*/
-
-#ifdef MATLAB_MEX_FILE
-
-/* use mexPrintf in a MATLAB mexFunction, for debugging and statistics output 
*/
-#define PRINTF mexPrintf
-
-/* In MATLAB, matrices are 1-based to the user, but 0-based internally */
-#define INDEX(i) ((i)+1)
-
-#else
-
-/* Use printf in standard C environment, for debugging and statistics output. 
*/
-/* Output is generated only if debugging is enabled at compile time, or if */
-/* the caller explicitly calls colamd_report or symamd_report. */
-#define PRINTF printf
-
-/* In C, matrices are 0-based and indices are reported as such in *_report */
-#define INDEX(i) (i)
-
-#endif /* MATLAB_MEX_FILE */
-
-/* ========================================================================== 
*/
-/* === Prototypes of PRIVATE routines ======================================= 
*/
-/* ========================================================================== 
*/
-
-PRIVATE int init_rows_cols
-(
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int p [],
-    int stats [COLAMD_STATS]
-) ;
-
-PRIVATE void init_scoring
-(
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int head [],
-    double knobs [COLAMD_KNOBS],
-    int *p_n_row2,
-    int *p_n_col2,
-    int *p_max_deg
-) ;
-
-PRIVATE int find_ordering
-(
-    int n_row,
-    int n_col,
-    int Alen,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int head [],
-    int n_col2,
-    int max_deg,
-    int pfree
-) ;
-
-PRIVATE void order_children
-(
-    int n_col,
-    Colamd_Col Col [],
-    int p []
-) ;
-
-PRIVATE void detect_super_cols
-(
-
-#ifndef NDEBUG
-    int n_col,
-    Colamd_Row Row [],
-#endif /* NDEBUG */
-
-    Colamd_Col Col [],
-    int A [],
-    int head [],
-    int row_start,
-    int row_length
-) ;
-
-PRIVATE int garbage_collection
-(
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int *pfree
-) ;
-
-PRIVATE int clear_mark
-(
-    int n_row,
-    Colamd_Row Row []
-) ;
-
-PRIVATE void print_report
-(
-    char *method,
-    int stats [COLAMD_STATS]
-) ;
-
-/* ========================================================================== 
*/
-/* === Debugging prototypes and definitions ================================= 
*/
-/* ========================================================================== 
*/
-
-#ifndef NDEBUG
-
-/* colamd_debug is the *ONLY* global variable, and is only */
-/* present when debugging */
-
-PRIVATE int colamd_debug ;     /* debug print level */
-
-#define DEBUG0(params) { (void) PRINTF params ; }
-#define DEBUG1(params) { if (colamd_debug >= 1) (void) PRINTF params ; }
-#define DEBUG2(params) { if (colamd_debug >= 2) (void) PRINTF params ; }
-#define DEBUG3(params) { if (colamd_debug >= 3) (void) PRINTF params ; }
-#define DEBUG4(params) { if (colamd_debug >= 4) (void) PRINTF params ; }
-
-#ifdef MATLAB_MEX_FILE
-#define ASSERT(expression) (mxAssert ((expression), ""))
-#else
-#define ASSERT(expression) (assert (expression))
-#endif /* MATLAB_MEX_FILE */
-
-PRIVATE void colamd_get_debug  /* gets the debug print level from getenv */
-(
-    char *method
-) ;
-
-PRIVATE void debug_deg_lists
-(
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int head [],
-    int min_score,
-    int should,
-    int max_deg
-) ;
-
-PRIVATE void debug_mark
-(
-    int n_row,
-    Colamd_Row Row [],
-    int tag_mark,
-    int max_mark
-) ;
-
-PRIVATE void debug_matrix
-(
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A []
-) ;
-
-PRIVATE void debug_structures
-(
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int n_col2
-) ;
-
-#else /* NDEBUG */
-
-/* === No debugging ========================================================= 
*/
-
-#define DEBUG0(params) ;
-#define DEBUG1(params) ;
-#define DEBUG2(params) ;
-#define DEBUG3(params) ;
-#define DEBUG4(params) ;
-
-#define ASSERT(expression) ((void) 0)
-
-#endif /* NDEBUG */
-
-/* ========================================================================== 
*/
-
-
-
-/* ========================================================================== 
*/
-/* === USER-CALLABLE ROUTINES: ============================================== 
*/
-/* ========================================================================== 
*/
-
-
-/* ========================================================================== 
*/
-/* === colamd_recommended =================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    The colamd_recommended routine returns the suggested size for Alen.  This
-    value has been determined to provide good balance between the number of
-    garbage collections and the memory requirements for colamd.  If any
-    argument is negative, a -1 is returned as an error condition.  This
-    function is also available as a macro defined in colamd.h, so that you
-    can use it for a statically-allocated array size.
-*/
-
-PUBLIC int colamd_recommended  /* returns recommended value of Alen. */
-(
-    /* === Parameters ======================================================= 
*/
-
-    int nnz,                   /* number of nonzeros in A */
-    int n_row,                 /* number of rows in A */
-    int n_col                  /* number of columns in A */
-)
-{
-    return (COLAMD_RECOMMENDED (nnz, n_row, n_col)) ; 
-}
-
-
-/* ========================================================================== 
*/
-/* === colamd_set_defaults ================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    The colamd_set_defaults routine sets the default values of the user-
-    controllable parameters for colamd:
-
-       knobs [0]       rows with knobs[0]*n_col entries or more are removed
-                       prior to ordering in colamd.  Rows and columns with
-                       knobs[0]*n_col entries or more are removed prior to
-                       ordering in symamd and placed last in the output
-                       ordering.
-
-       knobs [1]       columns with knobs[1]*n_row entries or more are removed
-                       prior to ordering in colamd, and placed last in the
-                       column permutation.  Symamd ignores this knob.
-
-       knobs [2..19]   unused, but future versions might use this
-*/
-
-PUBLIC void colamd_set_defaults
-(
-    /* === Parameters ======================================================= 
*/
-
-    double knobs [COLAMD_KNOBS]                /* knob array */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int i ;
-
-    if (!knobs)
-    {
-       return ;                        /* no knobs to initialize */
-    }
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-       knobs [i] = 0 ;
-    }
-    knobs [COLAMD_DENSE_ROW] = 0.5 ;   /* ignore rows over 50% dense */
-    knobs [COLAMD_DENSE_COL] = 0.5 ;   /* ignore columns over 50% dense */
-}
-
-
-/* ========================================================================== 
*/
-/* === symamd =============================================================== 
*/
-/* ========================================================================== 
*/
-
-PUBLIC int symamd                      /* return TRUE if OK, FALSE otherwise */
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n,                             /* number of rows and columns of A */
-    int A [],                          /* row indices of A */
-    int p [],                          /* column pointers of A */
-    int perm [],                       /* output permutation, size n+1 */
-    double knobs [COLAMD_KNOBS],       /* parameters (uses defaults if NULL) */
-    int stats [COLAMD_STATS],          /* output statistics and error codes */
-    void * (*allocate) (size_t, size_t),
-                                       /* pointer to calloc (ANSI C) or */
-                                       /* mxCalloc (for MATLAB mexFunction) */
-    void (*release) (void *)
-                                       /* pointer to free (ANSI C) or */
-                                       /* mxFree (for MATLAB mexFunction) */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int *count ;               /* length of each column of M, and col pointer*/
-    int *mark ;                        /* mark array for finding duplicate 
entries */
-    int *M ;                   /* row indices of matrix M */
-    int Mlen ;                 /* length of M */
-    int n_row ;                        /* number of rows in M */
-    int nnz ;                  /* number of entries in A */
-    int i ;                    /* row index of A */
-    int j ;                    /* column index of A */
-    int k ;                    /* row index of M */ 
-    int mnz ;                  /* number of nonzeros in M */
-    int pp ;                   /* index into a column of A */
-    int last_row ;             /* last row seen in the current column */
-    int length ;               /* number of nonzeros in a column */
-
-    double cknobs [COLAMD_KNOBS] ;             /* knobs for colamd */
-    double default_knobs [COLAMD_KNOBS] ;      /* default knobs for colamd */
-    int cstats [COLAMD_STATS] ;                        /* colamd stats */
-
-#ifndef NDEBUG
-    colamd_get_debug ("symamd") ;
-#endif /* NDEBUG */
-
-    /* === Check the input arguments ======================================== 
*/
-
-    if (!stats)
-    {
-       DEBUG0 (("symamd: stats not present\n")) ;
-       return (FALSE) ;
-    }
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-       stats [i] = 0 ;
-    }
-    stats [COLAMD_STATUS] = COLAMD_OK ;
-    stats [COLAMD_INFO1] = -1 ;
-    stats [COLAMD_INFO2] = -1 ;
-
-    if (!A)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
-       DEBUG0 (("symamd: A not present\n")) ;
-       return (FALSE) ;
-    }
-
-    if (!p)            /* p is not present */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
-       DEBUG0 (("symamd: p not present\n")) ;
-       return (FALSE) ;
-    }
-
-    if (n < 0)         /* n must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
-       stats [COLAMD_INFO1] = n ;
-       DEBUG0 (("symamd: n negative %d\n", n)) ;
-       return (FALSE) ;
-    }
-
-    nnz = p [n] ;
-    if (nnz < 0)       /* nnz must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
-       stats [COLAMD_INFO1] = nnz ;
-       DEBUG0 (("symamd: number of entries negative %d\n", nnz)) ;
-       return (FALSE) ;
-    }
-
-    if (p [0] != 0)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
-       stats [COLAMD_INFO1] = p [0] ;
-       DEBUG0 (("symamd: p[0] not zero %d\n", p [0])) ;
-       return (FALSE) ;
-    }
-
-    /* === If no knobs, set default knobs =================================== 
*/
-
-    if (!knobs)
-    {
-       colamd_set_defaults (default_knobs) ;
-       knobs = default_knobs ;
-    }
-
-    /* === Allocate count and mark ========================================== 
*/
-
-    count = (int *) ((*allocate) (n+1, sizeof (int))) ;
-    if (!count)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
-       DEBUG0 (("symamd: allocate count (size %d) failed\n", n+1)) ;
-       return (FALSE) ;
-    }
-
-    mark = (int *) ((*allocate) (n+1, sizeof (int))) ;
-    if (!mark)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
-       (*release) ((void *) count) ;
-       DEBUG0 (("symamd: allocate mark (size %d) failed\n", n+1)) ;
-       return (FALSE) ;
-    }
-
-    /* === Compute column counts of M, check if A is valid ================== 
*/
-
-    stats [COLAMD_INFO3] = 0 ;  /* number of duplicate or unsorted row 
indices*/
-
-    for (i = 0 ; i < n ; i++)
-    {
-       mark [i] = -1 ;
-    }
-
-    for (j = 0 ; j < n ; j++)
-    {
-       last_row = -1 ;
-
-       length = p [j+1] - p [j] ;
-       if (length < 0)
-       {
-           /* column pointers must be non-decreasing */
-           stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
-           stats [COLAMD_INFO1] = j ;
-           stats [COLAMD_INFO2] = length ;
-           (*release) ((void *) count) ;
-           (*release) ((void *) mark) ;
-           DEBUG0 (("symamd: col %d negative length %d\n", j, length)) ;
-           return (FALSE) ;
-       }
-
-       for (pp = p [j] ; pp < p [j+1] ; pp++)
-       {
-           i = A [pp] ;
-           if (i < 0 || i >= n)
-           {
-               /* row index i, in column j, is out of bounds */
-               stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
-               stats [COLAMD_INFO1] = j ;
-               stats [COLAMD_INFO2] = i ;
-               stats [COLAMD_INFO3] = n ;
-               (*release) ((void *) count) ;
-               (*release) ((void *) mark) ;
-               DEBUG0 (("symamd: row %d col %d out of bounds\n", i, j)) ;
-               return (FALSE) ;
-           }
-
-           if (i <= last_row || mark [i] == j)
-           {
-               /* row index is unsorted or repeated (or both), thus col */
-               /* is jumbled.  This is a notice, not an error condition. */
-               stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
-               stats [COLAMD_INFO1] = j ;
-               stats [COLAMD_INFO2] = i ;
-               (stats [COLAMD_INFO3]) ++ ;
-               DEBUG1 (("symamd: row %d col %d unsorted/duplicate\n", i, j)) ;
-           }
-
-           if (i > j && mark [i] != j)
-           {
-               /* row k of M will contain column indices i and j */
-               count [i]++ ;
-               count [j]++ ;
-           }
-
-           /* mark the row as having been seen in this column */
-           mark [i] = j ;
-
-           last_row = i ;
-       }
-    }
-
-    if (stats [COLAMD_STATUS] == COLAMD_OK)
-    {
-       /* if there are no duplicate entries, then mark is no longer needed */
-       (*release) ((void *) mark) ;
-    }
-
-    /* === Compute column pointers of M ===================================== 
*/
-
-    /* use output permutation, perm, for column pointers of M */
-    perm [0] = 0 ;
-    for (j = 1 ; j <= n ; j++)
-    {
-       perm [j] = perm [j-1] + count [j-1] ;
-    }
-    for (j = 0 ; j < n ; j++)
-    {
-       count [j] = perm [j] ;
-    }
-
-    /* === Construct M ====================================================== 
*/
-
-    mnz = perm [n] ;
-    n_row = mnz / 2 ;
-    Mlen = colamd_recommended (mnz, n_row, n) ;
-    M = (int *) ((*allocate) (Mlen, sizeof (int))) ;
-    DEBUG0 (("symamd: M is %d-by-%d with %d entries, Mlen = %d\n",
-       n_row, n, mnz, Mlen)) ;
-
-    if (!M)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_out_of_memory ;
-       (*release) ((void *) count) ;
-       (*release) ((void *) mark) ;
-       DEBUG0 (("symamd: allocate M (size %d) failed\n", Mlen)) ;
-       return (FALSE) ;
-    }
-
-    k = 0 ;
-
-    if (stats [COLAMD_STATUS] == COLAMD_OK)
-    {
-       /* Matrix is OK */
-       for (j = 0 ; j < n ; j++)
-       {
-           ASSERT (p [j+1] - p [j] >= 0) ;
-           for (pp = p [j] ; pp < p [j+1] ; pp++)
-           {
-               i = A [pp] ;
-               ASSERT (i >= 0 && i < n) ;
-               if (i > j)
-               {
-                   /* row k of M contains column indices i and j */
-                   M [count [i]++] = k ;
-                   M [count [j]++] = k ;
-                   k++ ;
-               }
-           }
-       }
-    }
-    else
-    {
-       /* Matrix is jumbled.  Do not add duplicates to M.  Unsorted cols OK. */
-       DEBUG0 (("symamd: Duplicates in A.\n")) ;
-       for (i = 0 ; i < n ; i++)
-       {
-           mark [i] = -1 ;
-       }
-       for (j = 0 ; j < n ; j++)
-       {
-           ASSERT (p [j+1] - p [j] >= 0) ;
-           for (pp = p [j] ; pp < p [j+1] ; pp++)
-           {
-               i = A [pp] ;
-               ASSERT (i >= 0 && i < n) ;
-               if (i > j && mark [i] != j)
-               {
-                   /* row k of M contains column indices i and j */
-                   M [count [i]++] = k ;
-                   M [count [j]++] = k ;
-                   k++ ;
-                   mark [i] = j ;
-               }
-           }
-       }
-       (*release) ((void *) mark) ;
-    }
-
-    /* count and mark no longer needed */
-    (*release) ((void *) count) ;
-    ASSERT (k == n_row) ;
-
-    /* === Adjust the knobs for M =========================================== 
*/
-
-    for (i = 0 ; i < COLAMD_KNOBS ; i++)
-    {
-       cknobs [i] = knobs [i] ;
-    }
-
-    /* there are no dense rows in M */
-    cknobs [COLAMD_DENSE_ROW] = 1.0 ;
-
-    if (n_row != 0 && n < n_row)
-    {
-       /* On input, the knob is a fraction of 1..n, the number of rows of A. */
-       /* Convert it to a fraction of 1..n_row, of the number of rows of M. */
-       cknobs [COLAMD_DENSE_COL] = (knobs [COLAMD_DENSE_ROW] * n) / n_row ;
-    }
-    else
-    {
-       /* no dense columns in M */
-       cknobs [COLAMD_DENSE_COL] = 1.0 ;
-    }
-
-    DEBUG0 (("symamd: dense col knob for M: %g\n", cknobs [COLAMD_DENSE_COL])) 
;
-
-    /* === Order the columns of M =========================================== 
*/
-
-    if (!colamd (n_row, n, Mlen, M, perm, cknobs, cstats))
-    {
-       /* This "cannot" happen, unless there is a bug in the code. */
-       stats [COLAMD_STATUS] = COLAMD_ERROR_internal_error ;
-       (*release) ((void *) M) ;
-       DEBUG0 (("symamd: internal error!\n")) ;
-       return (FALSE) ;
-    }
-
-    /* Note that the output permutation is now in perm */
-
-    /* === get the statistics for symamd from colamd ======================== 
*/
-
-    /* note that a dense column in colamd means a dense row and col in symamd 
*/
-    stats [COLAMD_DENSE_ROW]    = cstats [COLAMD_DENSE_COL] ;
-    stats [COLAMD_DENSE_COL]    = cstats [COLAMD_DENSE_COL] ;
-    stats [COLAMD_DEFRAG_COUNT] = cstats [COLAMD_DEFRAG_COUNT] ;
-
-    /* === Free M =========================================================== 
*/
-
-    (*release) ((void *) M) ;
-    DEBUG0 (("symamd: done.\n")) ;
-    return (TRUE) ;
-
-}
-
-/* ========================================================================== 
*/
-/* === colamd =============================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    The colamd routine computes a column ordering Q of a sparse matrix
-    A such that the LU factorization P(AQ) = LU remains sparse, where P is
-    selected via partial pivoting.   The routine can also be viewed as
-    providing a permutation Q such that the Cholesky factorization
-    (AQ)'(AQ) = LL' remains sparse.
-*/
-
-PUBLIC int colamd              /* returns TRUE if successful, FALSE otherwise*/
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,                 /* number of rows in A */
-    int n_col,                 /* number of columns in A */
-    int Alen,                  /* length of A */
-    int A [],                  /* row indices of A */
-    int p [],                  /* pointers to columns in A */
-    double knobs [COLAMD_KNOBS],/* parameters (uses defaults if NULL) */
-    int stats [COLAMD_STATS]   /* output statistics and error codes */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int i ;                    /* loop index */
-    int nnz ;                  /* nonzeros in A */
-    int Row_size ;             /* size of Row [], in integers */
-    int Col_size ;             /* size of Col [], in integers */
-    int need ;                 /* minimum required length of A */
-    Colamd_Row *Row ;          /* pointer into A of Row [0..n_row] array */
-    Colamd_Col *Col ;          /* pointer into A of Col [0..n_col] array */
-    int n_col2 ;               /* number of non-dense, non-empty columns */
-    int n_row2 ;               /* number of non-dense, non-empty rows */
-    int ngarbage ;             /* number of garbage collections performed */
-    int max_deg ;              /* maximum row degree */
-    double default_knobs [COLAMD_KNOBS] ;      /* default knobs array */
-
-#ifndef NDEBUG
-    colamd_get_debug ("colamd") ;
-#endif /* NDEBUG */
-
-    /* === Check the input arguments ======================================== 
*/
-
-    if (!stats)
-    {
-       DEBUG0 (("colamd: stats not present\n")) ;
-       return (FALSE) ;
-    }
-    for (i = 0 ; i < COLAMD_STATS ; i++)
-    {
-       stats [i] = 0 ;
-    }
-    stats [COLAMD_STATUS] = COLAMD_OK ;
-    stats [COLAMD_INFO1] = -1 ;
-    stats [COLAMD_INFO2] = -1 ;
-
-    if (!A)            /* A is not present */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_A_not_present ;
-       DEBUG0 (("colamd: A not present\n")) ;
-       return (FALSE) ;
-    }
-
-    if (!p)            /* p is not present */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p_not_present ;
-       DEBUG0 (("colamd: p not present\n")) ;
-       return (FALSE) ;
-    }
-
-    if (n_row < 0)     /* n_row must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_nrow_negative ;
-       stats [COLAMD_INFO1] = n_row ;
-       DEBUG0 (("colamd: nrow negative %d\n", n_row)) ;
-       return (FALSE) ;
-    }
-
-    if (n_col < 0)     /* n_col must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_ncol_negative ;
-       stats [COLAMD_INFO1] = n_col ;
-       DEBUG0 (("colamd: ncol negative %d\n", n_col)) ;
-       return (FALSE) ;
-    }
-
-    nnz = p [n_col] ;
-    if (nnz < 0)       /* nnz must be >= 0 */
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_nnz_negative ;
-       stats [COLAMD_INFO1] = nnz ;
-       DEBUG0 (("colamd: number of entries negative %d\n", nnz)) ;
-       return (FALSE) ;
-    }
-
-    if (p [0] != 0)
-    {
-       stats [COLAMD_STATUS] = COLAMD_ERROR_p0_nonzero ;
-       stats [COLAMD_INFO1] = p [0] ;
-       DEBUG0 (("colamd: p[0] not zero %d\n", p [0])) ;
-       return (FALSE) ;
-    }
-
-    /* === If no knobs, set default knobs =================================== 
*/
-
-    if (!knobs)
-    {
-       colamd_set_defaults (default_knobs) ;
-       knobs = default_knobs ;
-    }
-
-    /* === Allocate the Row and Col arrays from array A ===================== 
*/
-
-    Col_size = COLAMD_C (n_col) ;
-    Row_size = COLAMD_R (n_row) ;
-    need = 2*nnz + n_col + Col_size + Row_size ;
-
-    if (need > Alen)
-    {
-       /* not enough space in array A to perform the ordering */
-       stats [COLAMD_STATUS] = COLAMD_ERROR_A_too_small ;
-       stats [COLAMD_INFO1] = need ;
-       stats [COLAMD_INFO2] = Alen ;
-       DEBUG0 (("colamd: Need Alen >= %d, given only Alen = %d\n", need,Alen));
-       return (FALSE) ;
-    }
-
-    Alen -= Col_size + Row_size ;
-    Col = (Colamd_Col *) &A [Alen] ;
-    Row = (Colamd_Row *) &A [Alen + Col_size] ;
-
-    /* === Construct the row and column data structures ===================== 
*/
-
-    if (!init_rows_cols (n_row, n_col, Row, Col, A, p, stats))
-    {
-       /* input matrix is invalid */
-       DEBUG0 (("colamd: Matrix invalid\n")) ;
-       return (FALSE) ;
-    }
-
-    /* === Initialize scores, kill dense rows/columns ======================= 
*/
-
-    init_scoring (n_row, n_col, Row, Col, A, p, knobs,
-       &n_row2, &n_col2, &max_deg) ;
-
-    /* === Order the supercolumns =========================================== 
*/
-
-    ngarbage = find_ordering (n_row, n_col, Alen, Row, Col, A, p,
-       n_col2, max_deg, 2*nnz) ;
-
-    /* === Order the non-principal columns ================================== 
*/
-
-    order_children (n_col, Col, p) ;
-
-    /* === Return statistics in stats ======================================= 
*/
-
-    stats [COLAMD_DENSE_ROW] = n_row - n_row2 ;
-    stats [COLAMD_DENSE_COL] = n_col - n_col2 ;
-    stats [COLAMD_DEFRAG_COUNT] = ngarbage ;
-    DEBUG0 (("colamd: done.\n")) ; 
-    return (TRUE) ;
-}
-
-
-/* ========================================================================== 
*/
-/* === colamd_report ======================================================== 
*/
-/* ========================================================================== 
*/
-
-PUBLIC void colamd_report
-(
-    int stats [COLAMD_STATS]
-)
-{
-    print_report ("colamd", stats) ;
-}
-
-
-/* ========================================================================== 
*/
-/* === symamd_report ======================================================== 
*/
-/* ========================================================================== 
*/
-
-PUBLIC void symamd_report
-(
-    int stats [COLAMD_STATS]
-)
-{
-    print_report ("symamd", stats) ;
-}
-
-
-
-/* ========================================================================== 
*/
-/* === NON-USER-CALLABLE ROUTINES: ========================================== 
*/
-/* ========================================================================== 
*/
-
-/* There are no user-callable routines beyond this point in the file */
-
-
-/* ========================================================================== 
*/
-/* === init_rows_cols ======================================================= 
*/
-/* ========================================================================== 
*/
-
-/*
-    Takes the column form of the matrix in A and creates the row form of the
-    matrix.  Also, row and column attributes are stored in the Col and Row
-    structs.  If the columns are un-sorted or contain duplicate row indices,
-    this routine will also sort and remove duplicate row indices from the
-    column form of the matrix.  Returns FALSE if the matrix is invalid,
-    TRUE otherwise.  Not user-callable.
-*/
-
-PRIVATE int init_rows_cols     /* returns TRUE if OK, or FALSE otherwise */
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,                 /* number of rows of A */
-    int n_col,                 /* number of columns of A */
-    Colamd_Row Row [],         /* of size n_row+1 */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* row indices of A, of size Alen */
-    int p [],                  /* pointers to columns in A, of size n_col+1 */
-    int stats [COLAMD_STATS]   /* colamd statistics */ 
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int col ;                  /* a column index */
-    int row ;                  /* a row index */
-    int *cp ;                  /* a column pointer */
-    int *cp_end ;              /* a pointer to the end of a column */
-    int *rp ;                  /* a row pointer */
-    int *rp_end ;              /* a pointer to the end of a row */
-    int last_row ;             /* previous row */
-
-    /* === Initialize columns, and check column pointers ==================== 
*/
-
-    for (col = 0 ; col < n_col ; col++)
-    {
-       Col [col].start = p [col] ;
-       Col [col].length = p [col+1] - p [col] ;
-
-       if (Col [col].length < 0)
-       {
-           /* column pointers must be non-decreasing */
-           stats [COLAMD_STATUS] = COLAMD_ERROR_col_length_negative ;
-           stats [COLAMD_INFO1] = col ;
-           stats [COLAMD_INFO2] = Col [col].length ;
-           DEBUG0 (("colamd: col %d length %d < 0\n", col, Col [col].length)) ;
-           return (FALSE) ;
-       }
-
-       Col [col].shared1.thickness = 1 ;
-       Col [col].shared2.score = 0 ;
-       Col [col].shared3.prev = EMPTY ;
-       Col [col].shared4.degree_next = EMPTY ;
-    }
-
-    /* p [0..n_col] no longer needed, used as "head" in subsequent routines */
-
-    /* === Scan columns, compute row degrees, and check row indices ========= 
*/
-
-    stats [COLAMD_INFO3] = 0 ; /* number of duplicate or unsorted row indices*/
-
-    for (row = 0 ; row < n_row ; row++)
-    {
-       Row [row].length = 0 ;
-       Row [row].shared2.mark = -1 ;
-    }
-
-    for (col = 0 ; col < n_col ; col++)
-    {
-       last_row = -1 ;
-
-       cp = &A [p [col]] ;
-       cp_end = &A [p [col+1]] ;
-
-       while (cp < cp_end)
-       {
-           row = *cp++ ;
-
-           /* make sure row indices within range */
-           if (row < 0 || row >= n_row)
-           {
-               stats [COLAMD_STATUS] = COLAMD_ERROR_row_index_out_of_bounds ;
-               stats [COLAMD_INFO1] = col ;
-               stats [COLAMD_INFO2] = row ;
-               stats [COLAMD_INFO3] = n_row ;
-               DEBUG0 (("colamd: row %d col %d out of bounds\n", row, col)) ;
-               return (FALSE) ;
-           }
-
-           if (row <= last_row || Row [row].shared2.mark == col)
-           {
-               /* row index are unsorted or repeated (or both), thus col */
-               /* is jumbled.  This is a notice, not an error condition. */
-               stats [COLAMD_STATUS] = COLAMD_OK_BUT_JUMBLED ;
-               stats [COLAMD_INFO1] = col ;
-               stats [COLAMD_INFO2] = row ;
-               (stats [COLAMD_INFO3]) ++ ;
-               DEBUG1 (("colamd: row %d col %d unsorted/duplicate\n",row,col));
-           }
-
-           if (Row [row].shared2.mark != col)
-           {
-               Row [row].length++ ;
-           }
-           else
-           {
-               /* this is a repeated entry in the column, */
-               /* it will be removed */
-               Col [col].length-- ;
-           }
-
-           /* mark the row as having been seen in this column */
-           Row [row].shared2.mark = col ;
-
-           last_row = row ;
-       }
-    }
-
-    /* === Compute row pointers ============================================= 
*/
-
-    /* row form of the matrix starts directly after the column */
-    /* form of matrix in A */
-    Row [0].start = p [n_col] ;
-    Row [0].shared1.p = Row [0].start ;
-    Row [0].shared2.mark = -1 ;
-    for (row = 1 ; row < n_row ; row++)
-    {
-       Row [row].start = Row [row-1].start + Row [row-1].length ;
-       Row [row].shared1.p = Row [row].start ;
-       Row [row].shared2.mark = -1 ;
-    }
-
-    /* === Create row form ================================================== 
*/
-
-    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
-    {
-       /* if cols jumbled, watch for repeated row indices */
-       for (col = 0 ; col < n_col ; col++)
-       {
-           cp = &A [p [col]] ;
-           cp_end = &A [p [col+1]] ;
-           while (cp < cp_end)
-           {
-               row = *cp++ ;
-               if (Row [row].shared2.mark != col)
-               {
-                   A [(Row [row].shared1.p)++] = col ;
-                   Row [row].shared2.mark = col ;
-               }
-           }
-       }
-    }
-    else
-    {
-       /* if cols not jumbled, we don't need the mark (this is faster) */
-       for (col = 0 ; col < n_col ; col++)
-       {
-           cp = &A [p [col]] ;
-           cp_end = &A [p [col+1]] ;
-           while (cp < cp_end)
-           {
-               A [(Row [*cp++].shared1.p)++] = col ;
-           }
-       }
-    }
-
-    /* === Clear the row marks and set row degrees ========================== 
*/
-
-    for (row = 0 ; row < n_row ; row++)
-    {
-       Row [row].shared2.mark = 0 ;
-       Row [row].shared1.degree = Row [row].length ;
-    }
-
-    /* === See if we need to re-create columns ============================== 
*/
-
-    if (stats [COLAMD_STATUS] == COLAMD_OK_BUT_JUMBLED)
-    {
-       DEBUG0 (("colamd: reconstructing column form, matrix jumbled\n")) ;
-
-#ifndef NDEBUG
-       /* make sure column lengths are correct */
-       for (col = 0 ; col < n_col ; col++)
-       {
-           p [col] = Col [col].length ;
-       }
-       for (row = 0 ; row < n_row ; row++)
-       {
-           rp = &A [Row [row].start] ;
-           rp_end = rp + Row [row].length ;
-           while (rp < rp_end)
-           {
-               p [*rp++]-- ;
-           }
-       }
-       for (col = 0 ; col < n_col ; col++)
-       {
-           ASSERT (p [col] == 0) ;
-       }
-       /* now p is all zero (different than when debugging is turned off) */
-#endif /* NDEBUG */
-
-       /* === Compute col pointers ========================================= */
-
-       /* col form of the matrix starts at A [0]. */
-       /* Note, we may have a gap between the col form and the row */
-       /* form if there were duplicate entries, if so, it will be */
-       /* removed upon the first garbage collection */
-       Col [0].start = 0 ;
-       p [0] = Col [0].start ;
-       for (col = 1 ; col < n_col ; col++)
-       {
-           /* note that the lengths here are for pruned columns, i.e. */
-           /* no duplicate row indices will exist for these columns */
-           Col [col].start = Col [col-1].start + Col [col-1].length ;
-           p [col] = Col [col].start ;
-       }
-
-       /* === Re-create col form =========================================== */
-
-       for (row = 0 ; row < n_row ; row++)
-       {
-           rp = &A [Row [row].start] ;
-           rp_end = rp + Row [row].length ;
-           while (rp < rp_end)
-           {
-               A [(p [*rp++])++] = row ;
-           }
-       }
-    }
-
-    /* === Done.  Matrix is not (or no longer) jumbled ====================== 
*/
-
-    return (TRUE) ;
-}
-
-
-/* ========================================================================== 
*/
-/* === init_scoring ========================================================= 
*/
-/* ========================================================================== 
*/
-
-/*
-    Kills dense or empty columns and rows, calculates an initial score for
-    each column, and places all columns in the degree lists.  Not 
user-callable.
-*/
-
-PRIVATE void init_scoring
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,                 /* number of rows of A */
-    int n_col,                 /* number of columns of A */
-    Colamd_Row Row [],         /* of size n_row+1 */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* column form and row form of A */
-    int head [],               /* of size n_col+1 */
-    double knobs [COLAMD_KNOBS],/* parameters */
-    int *p_n_row2,             /* number of non-dense, non-empty rows */
-    int *p_n_col2,             /* number of non-dense, non-empty columns */
-    int *p_max_deg             /* maximum row degree */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int c ;                    /* a column index */
-    int r, row ;               /* a row index */
-    int *cp ;                  /* a column pointer */
-    int deg ;                  /* degree of a row or column */
-    int *cp_end ;              /* a pointer to the end of a column */
-    int *new_cp ;              /* new column pointer */
-    int col_length ;           /* length of pruned column */
-    int score ;                        /* current column score */
-    int n_col2 ;               /* number of non-dense, non-empty columns */
-    int n_row2 ;               /* number of non-dense, non-empty rows */
-    int dense_row_count ;      /* remove rows with more entries than this */
-    int dense_col_count ;      /* remove cols with more entries than this */
-    int min_score ;            /* smallest column score */
-    int max_deg ;              /* maximum row degree */
-    int next_col ;             /* Used to add to degree list.*/
-
-#ifndef NDEBUG
-    int debug_count ;          /* debug only. */
-#endif /* NDEBUG */
-
-    /* === Extract knobs ==================================================== 
*/
-
-    dense_row_count = MAX (0, MIN (knobs [COLAMD_DENSE_ROW] * n_col, n_col)) ;
-    dense_col_count = MAX (0, MIN (knobs [COLAMD_DENSE_COL] * n_row, n_row)) ;
-    DEBUG1 (("colamd: densecount: %d %d\n", dense_row_count, dense_col_count)) 
;
-    max_deg = 0 ;
-    n_col2 = n_col ;
-    n_row2 = n_row ;
-
-    /* === Kill empty columns =============================================== 
*/
-
-    /* Put the empty columns at the end in their natural order, so that LU */
-    /* factorization can proceed as far as possible. */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       deg = Col [c].length ;
-       if (deg == 0)
-       {
-           /* this is a empty column, kill and order it last */
-           Col [c].shared2.order = --n_col2 ;
-           KILL_PRINCIPAL_COL (c) ;
-       }
-    }
-    DEBUG1 (("colamd: null columns killed: %d\n", n_col - n_col2)) ;
-
-    /* === Kill dense columns =============================================== 
*/
-
-    /* Put the dense columns at the end, in their natural order */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       /* skip any dead columns */
-       if (COL_IS_DEAD (c))
-       {
-           continue ;
-       }
-       deg = Col [c].length ;
-       if (deg > dense_col_count)
-       {
-           /* this is a dense column, kill and order it last */
-           Col [c].shared2.order = --n_col2 ;
-           /* decrement the row degrees */
-           cp = &A [Col [c].start] ;
-           cp_end = cp + Col [c].length ;
-           while (cp < cp_end)
-           {
-               Row [*cp++].shared1.degree-- ;
-           }
-           KILL_PRINCIPAL_COL (c) ;
-       }
-    }
-    DEBUG1 (("colamd: Dense and null columns killed: %d\n", n_col - n_col2)) ;
-
-    /* === Kill dense and empty rows ======================================== 
*/
-
-    for (r = 0 ; r < n_row ; r++)
-    {
-       deg = Row [r].shared1.degree ;
-       ASSERT (deg >= 0 && deg <= n_col) ;
-       if (deg > dense_row_count || deg == 0)
-       {
-           /* kill a dense or empty row */
-           KILL_ROW (r) ;
-           --n_row2 ;
-       }
-       else
-       {
-           /* keep track of max degree of remaining rows */
-           max_deg = MAX (max_deg, deg) ;
-       }
-    }
-    DEBUG1 (("colamd: Dense and null rows killed: %d\n", n_row - n_row2)) ;
-
-    /* === Compute initial column scores ==================================== 
*/
-
-    /* At this point the row degrees are accurate.  They reflect the number */
-    /* of "live" (non-dense) columns in each row.  No empty rows exist. */
-    /* Some "live" columns may contain only dead rows, however.  These are */
-    /* pruned in the code below. */
-
-    /* now find the initial matlab score for each column */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       /* skip dead column */
-       if (COL_IS_DEAD (c))
-       {
-           continue ;
-       }
-       score = 0 ;
-       cp = &A [Col [c].start] ;
-       new_cp = cp ;
-       cp_end = cp + Col [c].length ;
-       while (cp < cp_end)
-       {
-           /* get a row */
-           row = *cp++ ;
-           /* skip if dead */
-           if (ROW_IS_DEAD (row))
-           {
-               continue ;
-           }
-           /* compact the column */
-           *new_cp++ = row ;
-           /* add row's external degree */
-           score += Row [row].shared1.degree - 1 ;
-           /* guard against integer overflow */
-           score = MIN (score, n_col) ;
-       }
-       /* determine pruned column length */
-       col_length = (int) (new_cp - &A [Col [c].start]) ;
-       if (col_length == 0)
-       {
-           /* a newly-made null column (all rows in this col are "dense" */
-           /* and have already been killed) */
-           DEBUG2 (("Newly null killed: %d\n", c)) ;
-           Col [c].shared2.order = --n_col2 ;
-           KILL_PRINCIPAL_COL (c) ;
-       }
-       else
-       {
-           /* set column length and set score */
-           ASSERT (score >= 0) ;
-           ASSERT (score <= n_col) ;
-           Col [c].length = col_length ;
-           Col [c].shared2.score = score ;
-       }
-    }
-    DEBUG1 (("colamd: Dense, null, and newly-null columns killed: %d\n",
-       n_col-n_col2)) ;
-
-    /* At this point, all empty rows and columns are dead.  All live columns */
-    /* are "clean" (containing no dead rows) and simplicial (no supercolumns */
-    /* yet).  Rows may contain dead columns, but all live rows contain at */
-    /* least one live column. */
-
-#ifndef NDEBUG
-    debug_structures (n_row, n_col, Row, Col, A, n_col2) ;
-#endif /* NDEBUG */
-
-    /* === Initialize degree lists ========================================== 
*/
-
-#ifndef NDEBUG
-    debug_count = 0 ;
-#endif /* NDEBUG */
-
-    /* clear the hash buckets */
-    for (c = 0 ; c <= n_col ; c++)
-    {
-       head [c] = EMPTY ;
-    }
-    min_score = n_col ;
-    /* place in reverse order, so low column indices are at the front */
-    /* of the lists.  This is to encourage natural tie-breaking */
-    for (c = n_col-1 ; c >= 0 ; c--)
-    {
-       /* only add principal columns to degree lists */
-       if (COL_IS_ALIVE (c))
-       {
-           DEBUG4 (("place %d score %d minscore %d ncol %d\n",
-               c, Col [c].shared2.score, min_score, n_col)) ;
-
-           /* === Add columns score to DList =============================== */
-
-           score = Col [c].shared2.score ;
-
-           ASSERT (min_score >= 0) ;
-           ASSERT (min_score <= n_col) ;
-           ASSERT (score >= 0) ;
-           ASSERT (score <= n_col) ;
-           ASSERT (head [score] >= EMPTY) ;
-
-           /* now add this column to dList at proper score location */
-           next_col = head [score] ;
-           Col [c].shared3.prev = EMPTY ;
-           Col [c].shared4.degree_next = next_col ;
-
-           /* if there already was a column with the same score, set its */
-           /* previous pointer to this new column */
-           if (next_col != EMPTY)
-           {
-               Col [next_col].shared3.prev = c ;
-           }
-           head [score] = c ;
-
-           /* see if this score is less than current min */
-           min_score = MIN (min_score, score) ;
-
-#ifndef NDEBUG
-           debug_count++ ;
-#endif /* NDEBUG */
-
-       }
-    }
-
-#ifndef NDEBUG
-    DEBUG1 (("colamd: Live cols %d out of %d, non-princ: %d\n",
-       debug_count, n_col, n_col-debug_count)) ;
-    ASSERT (debug_count == n_col2) ;
-    debug_deg_lists (n_row, n_col, Row, Col, head, min_score, n_col2, max_deg) 
;
-#endif /* NDEBUG */
-
-    /* === Return number of remaining columns, and max row degree =========== 
*/
-
-    *p_n_col2 = n_col2 ;
-    *p_n_row2 = n_row2 ;
-    *p_max_deg = max_deg ;
-}
-
-
-/* ========================================================================== 
*/
-/* === find_ordering ======================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    Order the principal columns of the supercolumn form of the matrix
-    (no supercolumns on input).  Uses a minimum approximate column minimum
-    degree ordering method.  Not user-callable.
-*/
-
-PRIVATE int find_ordering      /* return the number of garbage collections */
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,                 /* number of rows of A */
-    int n_col,                 /* number of columns of A */
-    int Alen,                  /* size of A, 2*nnz + n_col or larger */
-    Colamd_Row Row [],         /* of size n_row+1 */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* column form and row form of A */
-    int head [],               /* of size n_col+1 */
-    int n_col2,                        /* Remaining columns to order */
-    int max_deg,               /* Maximum row degree */
-    int pfree                  /* index of first free slot (2*nnz on entry) */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int k ;                    /* current pivot ordering step */
-    int pivot_col ;            /* current pivot column */
-    int *cp ;                  /* a column pointer */
-    int *rp ;                  /* a row pointer */
-    int pivot_row ;            /* current pivot row */
-    int *new_cp ;              /* modified column pointer */
-    int *new_rp ;              /* modified row pointer */
-    int pivot_row_start ;      /* pointer to start of pivot row */
-    int pivot_row_degree ;     /* number of columns in pivot row */
-    int pivot_row_length ;     /* number of supercolumns in pivot row */
-    int pivot_col_score ;      /* score of pivot column */
-    int needed_memory ;                /* free space needed for pivot row */
-    int *cp_end ;              /* pointer to the end of a column */
-    int *rp_end ;              /* pointer to the end of a row */
-    int row ;                  /* a row index */
-    int col ;                  /* a column index */
-    int max_score ;            /* maximum possible score */
-    int cur_score ;            /* score of current column */
-    unsigned int hash ;                /* hash value for supernode detection */
-    int head_column ;          /* head of hash bucket */
-    int first_col ;            /* first column in hash bucket */
-    int tag_mark ;             /* marker value for mark array */
-    int row_mark ;             /* Row [row].shared2.mark */
-    int set_difference ;       /* set difference size of row with pivot row */
-    int min_score ;            /* smallest column score */
-    int col_thickness ;                /* "thickness" (no. of columns in a 
supercol) */
-    int max_mark ;             /* maximum value of tag_mark */
-    int pivot_col_thickness ;  /* number of columns represented by pivot col */
-    int prev_col ;             /* Used by Dlist operations. */
-    int next_col ;             /* Used by Dlist operations. */
-    int ngarbage ;             /* number of garbage collections performed */
-
-#ifndef NDEBUG
-    int debug_d ;              /* debug loop counter */
-    int debug_step = 0 ;       /* debug loop counter */
-#endif /* NDEBUG */
-
-    /* === Initialization and clear mark ==================================== 
*/
-
-    max_mark = INT_MAX - n_col ;       /* INT_MAX defined in <limits.h> */
-    tag_mark = clear_mark (n_row, Row) ;
-    min_score = 0 ;
-    ngarbage = 0 ;
-    DEBUG1 (("colamd: Ordering, n_col2=%d\n", n_col2)) ;
-
-    /* === Order the columns ================================================ 
*/
-
-    for (k = 0 ; k < n_col2 ; /* 'k' is incremented below */)
-    {
-
-#ifndef NDEBUG
-       if (debug_step % 100 == 0)
-       {
-           DEBUG2 (("\n...       Step k: %d out of n_col2: %d\n", k, n_col2)) ;
-       }
-       else
-       {
-           DEBUG3 (("\n----------Step k: %d out of n_col2: %d\n", k, n_col2)) ;
-       }
-       debug_step++ ;
-       debug_deg_lists (n_row, n_col, Row, Col, head,
-               min_score, n_col2-k, max_deg) ;
-       debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif /* NDEBUG */
-
-       /* === Select pivot column, and order it ============================ */
-
-       /* make sure degree list isn't empty */
-       ASSERT (min_score >= 0) ;
-       ASSERT (min_score <= n_col) ;
-       ASSERT (head [min_score] >= EMPTY) ;
-
-#ifndef NDEBUG
-       for (debug_d = 0 ; debug_d < min_score ; debug_d++)
-       {
-           ASSERT (head [debug_d] == EMPTY) ;
-       }
-#endif /* NDEBUG */
-
-       /* get pivot column from head of minimum degree list */
-       while (head [min_score] == EMPTY && min_score < n_col)
-       {
-           min_score++ ;
-       }
-       pivot_col = head [min_score] ;
-       ASSERT (pivot_col >= 0 && pivot_col <= n_col) ;
-       next_col = Col [pivot_col].shared4.degree_next ;
-       head [min_score] = next_col ;
-       if (next_col != EMPTY)
-       {
-           Col [next_col].shared3.prev = EMPTY ;
-       }
-
-       ASSERT (COL_IS_ALIVE (pivot_col)) ;
-       DEBUG3 (("Pivot col: %d\n", pivot_col)) ;
-
-       /* remember score for defrag check */
-       pivot_col_score = Col [pivot_col].shared2.score ;
-
-       /* the pivot column is the kth column in the pivot order */
-       Col [pivot_col].shared2.order = k ;
-
-       /* increment order count by column thickness */
-       pivot_col_thickness = Col [pivot_col].shared1.thickness ;
-       k += pivot_col_thickness ;
-       ASSERT (pivot_col_thickness > 0) ;
-
-       /* === Garbage_collection, if necessary ============================= */
-
-       needed_memory = MIN (pivot_col_score, n_col - k) ;
-       if (pfree + needed_memory >= Alen)
-       {
-           pfree = garbage_collection (n_row, n_col, Row, Col, A, &A [pfree]) ;
-           ngarbage++ ;
-           /* after garbage collection we will have enough */
-           ASSERT (pfree + needed_memory < Alen) ;
-           /* garbage collection has wiped out the Row[].shared2.mark array */
-           tag_mark = clear_mark (n_row, Row) ;
-
-#ifndef NDEBUG
-           debug_matrix (n_row, n_col, Row, Col, A) ;
-#endif /* NDEBUG */
-       }
-
-       /* === Compute pivot row pattern ==================================== */
-
-       /* get starting location for this new merged row */
-       pivot_row_start = pfree ;
-
-       /* initialize new row counts to zero */
-       pivot_row_degree = 0 ;
-
-       /* tag pivot column as having been visited so it isn't included */
-       /* in merged pivot row */
-       Col [pivot_col].shared1.thickness = -pivot_col_thickness ;
-
-       /* pivot row is the union of all rows in the pivot column pattern */
-       cp = &A [Col [pivot_col].start] ;
-       cp_end = cp + Col [pivot_col].length ;
-       while (cp < cp_end)
-       {
-           /* get a row */
-           row = *cp++ ;
-           DEBUG4 (("Pivot col pattern %d %d\n", ROW_IS_ALIVE (row), row)) ;
-           /* skip if row is dead */
-           if (ROW_IS_DEAD (row))
-           {
-               continue ;
-           }
-           rp = &A [Row [row].start] ;
-           rp_end = rp + Row [row].length ;
-           while (rp < rp_end)
-           {
-               /* get a column */
-               col = *rp++ ;
-               /* add the column, if alive and untagged */
-               col_thickness = Col [col].shared1.thickness ;
-               if (col_thickness > 0 && COL_IS_ALIVE (col))
-               {
-                   /* tag column in pivot row */
-                   Col [col].shared1.thickness = -col_thickness ;
-                   ASSERT (pfree < Alen) ;
-                   /* place column in pivot row */
-                   A [pfree++] = col ;
-                   pivot_row_degree += col_thickness ;
-               }
-           }
-       }
-
-       /* clear tag on pivot column */
-       Col [pivot_col].shared1.thickness = pivot_col_thickness ;
-       max_deg = MAX (max_deg, pivot_row_degree) ;
-
-#ifndef NDEBUG
-       DEBUG3 (("check2\n")) ;
-       debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif /* NDEBUG */
-
-       /* === Kill all rows used to construct pivot row ==================== */
-
-       /* also kill pivot row, temporarily */
-       cp = &A [Col [pivot_col].start] ;
-       cp_end = cp + Col [pivot_col].length ;
-       while (cp < cp_end)
-       {
-           /* may be killing an already dead row */
-           row = *cp++ ;
-           DEBUG3 (("Kill row in pivot col: %d\n", row)) ;
-           KILL_ROW (row) ;
-       }
-
-       /* === Select a row index to use as the new pivot row =============== */
-
-       pivot_row_length = pfree - pivot_row_start ;
-       if (pivot_row_length > 0)
-       {
-           /* pick the "pivot" row arbitrarily (first row in col) */
-           pivot_row = A [Col [pivot_col].start] ;
-           DEBUG3 (("Pivotal row is %d\n", pivot_row)) ;
-       }
-       else
-       {
-           /* there is no pivot row, since it is of zero length */
-           pivot_row = EMPTY ;
-           ASSERT (pivot_row_length == 0) ;
-       }
-       ASSERT (Col [pivot_col].length > 0 || pivot_row_length == 0) ;
-
-       /* === Approximate degree computation =============================== */
-
-       /* Here begins the computation of the approximate degree.  The column */
-       /* score is the sum of the pivot row "length", plus the size of the */
-       /* set differences of each row in the column minus the pattern of the */
-       /* pivot row itself.  The column ("thickness") itself is also */
-       /* excluded from the column score (we thus use an approximate */
-       /* external degree). */
-
-       /* The time taken by the following code (compute set differences, and */
-       /* add them up) is proportional to the size of the data structure */
-       /* being scanned - that is, the sum of the sizes of each column in */
-       /* the pivot row.  Thus, the amortized time to compute a column score */
-       /* is proportional to the size of that column (where size, in this */
-       /* context, is the column "length", or the number of row indices */
-       /* in that column).  The number of row indices in a column is */
-       /* monotonically non-decreasing, from the length of the original */
-       /* column on input to colamd. */
-
-       /* === Compute set differences ====================================== */
-
-       DEBUG3 (("** Computing set differences phase. **\n")) ;
-
-       /* pivot row is currently dead - it will be revived later. */
-
-       DEBUG3 (("Pivot row: ")) ;
-       /* for each column in pivot row */
-       rp = &A [pivot_row_start] ;
-       rp_end = rp + pivot_row_length ;
-       while (rp < rp_end)
-       {
-           col = *rp++ ;
-           ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
-           DEBUG3 (("Col: %d\n", col)) ;
-
-           /* clear tags used to construct pivot row pattern */
-           col_thickness = -Col [col].shared1.thickness ;
-           ASSERT (col_thickness > 0) ;
-           Col [col].shared1.thickness = col_thickness ;
-
-           /* === Remove column from degree list =========================== */
-
-           cur_score = Col [col].shared2.score ;
-           prev_col = Col [col].shared3.prev ;
-           next_col = Col [col].shared4.degree_next ;
-           ASSERT (cur_score >= 0) ;
-           ASSERT (cur_score <= n_col) ;
-           ASSERT (cur_score >= EMPTY) ;
-           if (prev_col == EMPTY)
-           {
-               head [cur_score] = next_col ;
-           }
-           else
-           {
-               Col [prev_col].shared4.degree_next = next_col ;
-           }
-           if (next_col != EMPTY)
-           {
-               Col [next_col].shared3.prev = prev_col ;
-           }
-
-           /* === Scan the column ========================================== */
-
-           cp = &A [Col [col].start] ;
-           cp_end = cp + Col [col].length ;
-           while (cp < cp_end)
-           {
-               /* get a row */
-               row = *cp++ ;
-               row_mark = Row [row].shared2.mark ;
-               /* skip if dead */
-               if (ROW_IS_MARKED_DEAD (row_mark))
-               {
-                   continue ;
-               }
-               ASSERT (row != pivot_row) ;
-               set_difference = row_mark - tag_mark ;
-               /* check if the row has been seen yet */
-               if (set_difference < 0)
-               {
-                   ASSERT (Row [row].shared1.degree <= max_deg) ;
-                   set_difference = Row [row].shared1.degree ;
-               }
-               /* subtract column thickness from this row's set difference */
-               set_difference -= col_thickness ;
-               ASSERT (set_difference >= 0) ;
-               /* absorb this row if the set difference becomes zero */
-               if (set_difference == 0)
-               {
-                   DEBUG3 (("aggressive absorption. Row: %d\n", row)) ;
-                   KILL_ROW (row) ;
-               }
-               else
-               {
-                   /* save the new mark */
-                   Row [row].shared2.mark = set_difference + tag_mark ;
-               }
-           }
-       }
-
-#ifndef NDEBUG
-       debug_deg_lists (n_row, n_col, Row, Col, head,
-               min_score, n_col2-k-pivot_row_degree, max_deg) ;
-#endif /* NDEBUG */
-
-       /* === Add up set differences for each column ======================= */
-
-       DEBUG3 (("** Adding set differences phase. **\n")) ;
-
-       /* for each column in pivot row */
-       rp = &A [pivot_row_start] ;
-       rp_end = rp + pivot_row_length ;
-       while (rp < rp_end)
-       {
-           /* get a column */
-           col = *rp++ ;
-           ASSERT (COL_IS_ALIVE (col) && col != pivot_col) ;
-           hash = 0 ;
-           cur_score = 0 ;
-           cp = &A [Col [col].start] ;
-           /* compact the column */
-           new_cp = cp ;
-           cp_end = cp + Col [col].length ;
-
-           DEBUG4 (("Adding set diffs for Col: %d.\n", col)) ;
-
-           while (cp < cp_end)
-           {
-               /* get a row */
-               row = *cp++ ;
-               ASSERT(row >= 0 && row < n_row) ;
-               row_mark = Row [row].shared2.mark ;
-               /* skip if dead */
-               if (ROW_IS_MARKED_DEAD (row_mark))
-               {
-                   continue ;
-               }
-               ASSERT (row_mark > tag_mark) ;
-               /* compact the column */
-               *new_cp++ = row ;
-               /* compute hash function */
-               hash += row ;
-               /* add set difference */
-               cur_score += row_mark - tag_mark ;
-               /* integer overflow... */
-               cur_score = MIN (cur_score, n_col) ;
-           }
-
-           /* recompute the column's length */
-           Col [col].length = (int) (new_cp - &A [Col [col].start]) ;
-
-           /* === Further mass elimination ================================= */
-
-           if (Col [col].length == 0)
-           {
-               DEBUG4 (("further mass elimination. Col: %d\n", col)) ;
-               /* nothing left but the pivot row in this column */
-               KILL_PRINCIPAL_COL (col) ;
-               pivot_row_degree -= Col [col].shared1.thickness ;
-               ASSERT (pivot_row_degree >= 0) ;
-               /* order it */
-               Col [col].shared2.order = k ;
-               /* increment order count by column thickness */
-               k += Col [col].shared1.thickness ;
-           }
-           else
-           {
-               /* === Prepare for supercolumn detection ==================== */
-
-               DEBUG4 (("Preparing supercol detection for Col: %d.\n", col)) ;
-
-               /* save score so far */
-               Col [col].shared2.score = cur_score ;
-
-               /* add column to hash table, for supercolumn detection */
-               hash %= n_col + 1 ;
-
-               DEBUG4 ((" Hash = %d, n_col = %d.\n", hash, n_col)) ;
-               ASSERT (hash <= n_col) ;
-
-               head_column = head [hash] ;
-               if (head_column > EMPTY)
-               {
-                   /* degree list "hash" is non-empty, use prev (shared3) of */
-                   /* first column in degree list as head of hash bucket */
-                   first_col = Col [head_column].shared3.headhash ;
-                   Col [head_column].shared3.headhash = col ;
-               }
-               else
-               {
-                   /* degree list "hash" is empty, use head as hash bucket */
-                   first_col = - (head_column + 2) ;
-                   head [hash] = - (col + 2) ;
-               }
-               Col [col].shared4.hash_next = first_col ;
-
-               /* save hash function in Col [col].shared3.hash */
-               Col [col].shared3.hash = (int) hash ;
-               ASSERT (COL_IS_ALIVE (col)) ;
-           }
-       }
-
-       /* The approximate external column degree is now computed.  */
-
-       /* === Supercolumn detection ======================================== */
-
-       DEBUG3 (("** Supercolumn detection phase. **\n")) ;
-
-       detect_super_cols (
-
-#ifndef NDEBUG
-               n_col, Row,
-#endif /* NDEBUG */
-
-               Col, A, head, pivot_row_start, pivot_row_length) ;
-
-       /* === Kill the pivotal column ====================================== */
-
-       KILL_PRINCIPAL_COL (pivot_col) ;
-
-       /* === Clear mark =================================================== */
-
-       tag_mark += (max_deg + 1) ;
-       if (tag_mark >= max_mark)
-       {
-           DEBUG2 (("clearing tag_mark\n")) ;
-           tag_mark = clear_mark (n_row, Row) ;
-       }
-
-#ifndef NDEBUG
-       DEBUG3 (("check3\n")) ;
-       debug_mark (n_row, Row, tag_mark, max_mark) ;
-#endif /* NDEBUG */
-
-       /* === Finalize the new pivot row, and column scores ================ */
-
-       DEBUG3 (("** Finalize scores phase. **\n")) ;
-
-       /* for each column in pivot row */
-       rp = &A [pivot_row_start] ;
-       /* compact the pivot row */
-       new_rp = rp ;
-       rp_end = rp + pivot_row_length ;
-       while (rp < rp_end)
-       {
-           col = *rp++ ;
-           /* skip dead columns */
-           if (COL_IS_DEAD (col))
-           {
-               continue ;
-           }
-           *new_rp++ = col ;
-           /* add new pivot row to column */
-           A [Col [col].start + (Col [col].length++)] = pivot_row ;
-
-           /* retrieve score so far and add on pivot row's degree. */
-           /* (we wait until here for this in case the pivot */
-           /* row's degree was reduced due to mass elimination). */
-           cur_score = Col [col].shared2.score + pivot_row_degree ;
-
-           /* calculate the max possible score as the number of */
-           /* external columns minus the 'k' value minus the */
-           /* columns thickness */
-           max_score = n_col - k - Col [col].shared1.thickness ;
-
-           /* make the score the external degree of the union-of-rows */
-           cur_score -= Col [col].shared1.thickness ;
-
-           /* make sure score is less or equal than the max score */
-           cur_score = MIN (cur_score, max_score) ;
-           ASSERT (cur_score >= 0) ;
-
-           /* store updated score */
-           Col [col].shared2.score = cur_score ;
-
-           /* === Place column back in degree list ========================= */
-
-           ASSERT (min_score >= 0) ;
-           ASSERT (min_score <= n_col) ;
-           ASSERT (cur_score >= 0) ;
-           ASSERT (cur_score <= n_col) ;
-           ASSERT (head [cur_score] >= EMPTY) ;
-           next_col = head [cur_score] ;
-           Col [col].shared4.degree_next = next_col ;
-           Col [col].shared3.prev = EMPTY ;
-           if (next_col != EMPTY)
-           {
-               Col [next_col].shared3.prev = col ;
-           }
-           head [cur_score] = col ;
-
-           /* see if this score is less than current min */
-           min_score = MIN (min_score, cur_score) ;
-
-       }
-
-#ifndef NDEBUG
-       debug_deg_lists (n_row, n_col, Row, Col, head,
-               min_score, n_col2-k, max_deg) ;
-#endif /* NDEBUG */
-
-       /* === Resurrect the new pivot row ================================== */
-
-       if (pivot_row_degree > 0)
-       {
-           /* update pivot row length to reflect any cols that were killed */
-           /* during super-col detection and mass elimination */
-           Row [pivot_row].start  = pivot_row_start ;
-           Row [pivot_row].length = (int) (new_rp - &A[pivot_row_start]) ;
-           Row [pivot_row].shared1.degree = pivot_row_degree ;
-           Row [pivot_row].shared2.mark = 0 ;
-           /* pivot row is no longer dead */
-       }
-    }
-
-    /* === All principal columns have now been ordered ====================== 
*/
-
-    return (ngarbage) ;
-}
-
-
-/* ========================================================================== 
*/
-/* === order_children ======================================================= 
*/
-/* ========================================================================== 
*/
-
-/*
-    The find_ordering routine has ordered all of the principal columns (the
-    representatives of the supercolumns).  The non-principal columns have not
-    yet been ordered.  This routine orders those columns by walking up the
-    parent tree (a column is a child of the column which absorbed it).  The
-    final permutation vector is then placed in p [0 ... n_col-1], with p [0]
-    being the first column, and p [n_col-1] being the last.  It doesn't look
-    like it at first glance, but be assured that this routine takes time linear
-    in the number of columns.  Although not immediately obvious, the time
-    taken by this routine is O (n_col), that is, linear in the number of
-    columns.  Not user-callable.
-*/
-
-PRIVATE void order_children
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_col,                 /* number of columns of A */
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int p []                   /* p [0 ... n_col-1] is the column permutation*/
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int i ;                    /* loop counter for all columns */
-    int c ;                    /* column index */
-    int parent ;               /* index of column's parent */
-    int order ;                        /* column's order */
-
-    /* === Order each non-principal column ================================== 
*/
-
-    for (i = 0 ; i < n_col ; i++)
-    {
-       /* find an un-ordered non-principal column */
-       ASSERT (COL_IS_DEAD (i)) ;
-       if (!COL_IS_DEAD_PRINCIPAL (i) && Col [i].shared2.order == EMPTY)
-       {
-           parent = i ;
-           /* once found, find its principal parent */
-           do
-           {
-               parent = Col [parent].shared1.parent ;
-           } while (!COL_IS_DEAD_PRINCIPAL (parent)) ;
-
-           /* now, order all un-ordered non-principal columns along path */
-           /* to this parent.  collapse tree at the same time */
-           c = i ;
-           /* get order of parent */
-           order = Col [parent].shared2.order ;
-
-           do
-           {
-               ASSERT (Col [c].shared2.order == EMPTY) ;
-
-               /* order this column */
-               Col [c].shared2.order = order++ ;
-               /* collaps tree */
-               Col [c].shared1.parent = parent ;
-
-               /* get immediate parent of this column */
-               c = Col [c].shared1.parent ;
-
-               /* continue until we hit an ordered column.  There are */
-               /* guarranteed not to be anymore unordered columns */
-               /* above an ordered column */
-           } while (Col [c].shared2.order == EMPTY) ;
-
-           /* re-order the super_col parent to largest order for this group */
-           Col [parent].shared2.order = order ;
-       }
-    }
-
-    /* === Generate the permutation ========================================= 
*/
-
-    for (c = 0 ; c < n_col ; c++)
-    {
-       p [Col [c].shared2.order] = c ;
-    }
-}
-
-
-/* ========================================================================== 
*/
-/* === detect_super_cols ==================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    Detects supercolumns by finding matches between columns in the hash 
buckets.
-    Check amongst columns in the set A [row_start ... row_start + 
row_length-1].
-    The columns under consideration are currently *not* in the degree lists,
-    and have already been placed in the hash buckets.
-
-    The hash bucket for columns whose hash function is equal to h is stored
-    as follows:
-
-       if head [h] is >= 0, then head [h] contains a degree list, so:
-
-               head [h] is the first column in degree bucket h.
-               Col [head [h]].headhash gives the first column in hash bucket h.
-
-       otherwise, the degree list is empty, and:
-
-               -(head [h] + 2) is the first column in hash bucket h.
-
-    For a column c in a hash bucket, Col [c].shared3.prev is NOT a "previous
-    column" pointer.  Col [c].shared3.hash is used instead as the hash number
-    for that column.  The value of Col [c].shared4.hash_next is the next column
-    in the same hash bucket.
-
-    Assuming no, or "few" hash collisions, the time taken by this routine is
-    linear in the sum of the sizes (lengths) of each column whose score has
-    just been computed in the approximate degree computation.
-    Not user-callable.
-*/
-
-PRIVATE void detect_super_cols
-(
-    /* === Parameters ======================================================= 
*/
-
-#ifndef NDEBUG
-    /* these two parameters are only needed when debugging is enabled: */
-    int n_col,                 /* number of columns of A */
-    Colamd_Row Row [],         /* of size n_row+1 */
-#endif /* NDEBUG */
-
-    Colamd_Col Col [],         /* of size n_col+1 */
-    int A [],                  /* row indices of A */
-    int head [],               /* head of degree lists and hash buckets */
-    int row_start,             /* pointer to set of columns to check */
-    int row_length             /* number of columns to check */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int hash ;                 /* hash value for a column */
-    int *rp ;                  /* pointer to a row */
-    int c ;                    /* a column index */
-    int super_c ;              /* column index of the column to absorb into */
-    int *cp1 ;                 /* column pointer for column super_c */
-    int *cp2 ;                 /* column pointer for column c */
-    int length ;               /* length of column super_c */
-    int prev_c ;               /* column preceding c in hash bucket */
-    int i ;                    /* loop counter */
-    int *rp_end ;              /* pointer to the end of the row */
-    int col ;                  /* a column index in the row to check */
-    int head_column ;          /* first column in hash bucket or degree list */
-    int first_col ;            /* first column in hash bucket */
-
-    /* === Consider each column in the row ================================== 
*/
-
-    rp = &A [row_start] ;
-    rp_end = rp + row_length ;
-    while (rp < rp_end)
-    {
-       col = *rp++ ;
-       if (COL_IS_DEAD (col))
-       {
-           continue ;
-       }
-
-       /* get hash number for this column */
-       hash = Col [col].shared3.hash ;
-       ASSERT (hash <= n_col) ;
-
-       /* === Get the first column in this hash bucket ===================== */
-
-       head_column = head [hash] ;
-       if (head_column > EMPTY)
-       {
-           first_col = Col [head_column].shared3.headhash ;
-       }
-       else
-       {
-           first_col = - (head_column + 2) ;
-       }
-
-       /* === Consider each column in the hash bucket ====================== */
-
-       for (super_c = first_col ; super_c != EMPTY ;
-           super_c = Col [super_c].shared4.hash_next)
-       {
-           ASSERT (COL_IS_ALIVE (super_c)) ;
-           ASSERT (Col [super_c].shared3.hash == hash) ;
-           length = Col [super_c].length ;
-
-           /* prev_c is the column preceding column c in the hash bucket */
-           prev_c = super_c ;
-
-           /* === Compare super_c with all columns after it ================ */
-
-           for (c = Col [super_c].shared4.hash_next ;
-                c != EMPTY ; c = Col [c].shared4.hash_next)
-           {
-               ASSERT (c != super_c) ;
-               ASSERT (COL_IS_ALIVE (c)) ;
-               ASSERT (Col [c].shared3.hash == hash) ;
-
-               /* not identical if lengths or scores are different */
-               if (Col [c].length != length ||
-                   Col [c].shared2.score != Col [super_c].shared2.score)
-               {
-                   prev_c = c ;
-                   continue ;
-               }
-
-               /* compare the two columns */
-               cp1 = &A [Col [super_c].start] ;
-               cp2 = &A [Col [c].start] ;
-
-               for (i = 0 ; i < length ; i++)
-               {
-                   /* the columns are "clean" (no dead rows) */
-                   ASSERT (ROW_IS_ALIVE (*cp1))  ;
-                   ASSERT (ROW_IS_ALIVE (*cp2))  ;
-                   /* row indices will same order for both supercols, */
-                   /* no gather scatter nessasary */
-                   if (*cp1++ != *cp2++)
-                   {
-                       break ;
-                   }
-               }
-
-               /* the two columns are different if the for-loop "broke" */
-               if (i != length)
-               {
-                   prev_c = c ;
-                   continue ;
-               }
-
-               /* === Got it!  two columns are identical =================== */
-
-               ASSERT (Col [c].shared2.score == Col [super_c].shared2.score) ;
-
-               Col [super_c].shared1.thickness += Col [c].shared1.thickness ;
-               Col [c].shared1.parent = super_c ;
-               KILL_NON_PRINCIPAL_COL (c) ;
-               /* order c later, in order_children() */
-               Col [c].shared2.order = EMPTY ;
-               /* remove c from hash bucket */
-               Col [prev_c].shared4.hash_next = Col [c].shared4.hash_next ;
-           }
-       }
-
-       /* === Empty this hash bucket ======================================= */
-
-       if (head_column > EMPTY)
-       {
-           /* corresponding degree list "hash" is not empty */
-           Col [head_column].shared3.headhash = EMPTY ;
-       }
-       else
-       {
-           /* corresponding degree list "hash" is empty */
-           head [hash] = EMPTY ;
-       }
-    }
-}
-
-
-/* ========================================================================== 
*/
-/* === garbage_collection =================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    Defragments and compacts columns and rows in the workspace A.  Used when
-    all avaliable memory has been used while performing row merging.  Returns
-    the index of the first free position in A, after garbage collection.  The
-    time taken by this routine is linear is the size of the array A, which is
-    itself linear in the number of nonzeros in the input matrix.
-    Not user-callable.
-*/
-
-PRIVATE int garbage_collection  /* returns the new value of pfree */
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,                 /* number of rows */
-    int n_col,                 /* number of columns */
-    Colamd_Row Row [],         /* row info */
-    Colamd_Col Col [],         /* column info */
-    int A [],                  /* A [0 ... Alen-1] holds the matrix */
-    int *pfree                 /* &A [0] ... pfree is in use */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int *psrc ;                        /* source pointer */
-    int *pdest ;               /* destination pointer */
-    int j ;                    /* counter */
-    int r ;                    /* a row index */
-    int c ;                    /* a column index */
-    int length ;               /* length of a row or column */
-
-#ifndef NDEBUG
-    int debug_rows ;
-    DEBUG2 (("Defrag..\n")) ;
-    for (psrc = &A[0] ; psrc < pfree ; psrc++) ASSERT (*psrc >= 0) ;
-    debug_rows = 0 ;
-#endif /* NDEBUG */
-
-    /* === Defragment the columns =========================================== 
*/
-
-    pdest = &A[0] ;
-    for (c = 0 ; c < n_col ; c++)
-    {
-       if (COL_IS_ALIVE (c))
-       {
-           psrc = &A [Col [c].start] ;
-
-           /* move and compact the column */
-           ASSERT (pdest <= psrc) ;
-           Col [c].start = (int) (pdest - &A [0]) ;
-           length = Col [c].length ;
-           for (j = 0 ; j < length ; j++)
-           {
-               r = *psrc++ ;
-               if (ROW_IS_ALIVE (r))
-               {
-                   *pdest++ = r ;
-               }
-           }
-           Col [c].length = (int) (pdest - &A [Col [c].start]) ;
-       }
-    }
-
-    /* === Prepare to defragment the rows =================================== 
*/
-
-    for (r = 0 ; r < n_row ; r++)
-    {
-       if (ROW_IS_ALIVE (r))
-       {
-           if (Row [r].length == 0)
-           {
-               /* this row is of zero length.  cannot compact it, so kill it */
-               DEBUG3 (("Defrag row kill\n")) ;
-               KILL_ROW (r) ;
-           }
-           else
-           {
-               /* save first column index in Row [r].shared2.first_column */
-               psrc = &A [Row [r].start] ;
-               Row [r].shared2.first_column = *psrc ;
-               ASSERT (ROW_IS_ALIVE (r)) ;
-               /* flag the start of the row with the one's complement of row */
-               *psrc = ONES_COMPLEMENT (r) ;
-
-#ifndef NDEBUG
-               debug_rows++ ;
-#endif /* NDEBUG */
-
-           }
-       }
-    }
-
-    /* === Defragment the rows ============================================== 
*/
-
-    psrc = pdest ;
-    while (psrc < pfree)
-    {
-       /* find a negative number ... the start of a row */
-       if (*psrc++ < 0)
-       {
-           psrc-- ;
-           /* get the row index */
-           r = ONES_COMPLEMENT (*psrc) ;
-           ASSERT (r >= 0 && r < n_row) ;
-           /* restore first column index */
-           *psrc = Row [r].shared2.first_column ;
-           ASSERT (ROW_IS_ALIVE (r)) ;
-
-           /* move and compact the row */
-           ASSERT (pdest <= psrc) ;
-           Row [r].start = (int) (pdest - &A [0]) ;
-           length = Row [r].length ;
-           for (j = 0 ; j < length ; j++)
-           {
-               c = *psrc++ ;
-               if (COL_IS_ALIVE (c))
-               {
-                   *pdest++ = c ;
-               }
-           }
-           Row [r].length = (int) (pdest - &A [Row [r].start]) ;
-
-#ifndef NDEBUG
-           debug_rows-- ;
-#endif /* NDEBUG */
-
-       }
-    }
-    /* ensure we found all the rows */
-    ASSERT (debug_rows == 0) ;
-
-    /* === Return the new value of pfree ==================================== 
*/
-
-    return ((int) (pdest - &A [0])) ;
-}
-
-
-/* ========================================================================== 
*/
-/* === clear_mark =========================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    Clears the Row [].shared2.mark array, and returns the new tag_mark.
-    Return value is the new tag_mark.  Not user-callable.
-*/
-
-PRIVATE int clear_mark /* return the new value for tag_mark */
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,         /* number of rows in A */
-    Colamd_Row Row []  /* Row [0 ... n_row-1].shared2.mark is set to zero */
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int r ;
-
-    for (r = 0 ; r < n_row ; r++)
-    {
-       if (ROW_IS_ALIVE (r))
-       {
-           Row [r].shared2.mark = 0 ;
-       }
-    }
-    return (1) ;
-}
-
-
-/* ========================================================================== 
*/
-/* === print_report ========================================================= 
*/
-/* ========================================================================== 
*/
-
-PRIVATE void print_report
-(
-    char *method,
-    int stats [COLAMD_STATS]
-)
-{
-
-    int i1, i2, i3 ;
-
-    if (!stats)
-    {
-       PRINTF ("%s: No statistics available.\n", method) ;
-       return ;
-    }
-
-    i1 = stats [COLAMD_INFO1] ;
-    i2 = stats [COLAMD_INFO2] ;
-    i3 = stats [COLAMD_INFO3] ;
-
-    if (stats [COLAMD_STATUS] >= 0)
-    {
-       PRINTF ("%s: OK.  ", method) ;
-    }
-    else
-    {
-       PRINTF ("%s: ERROR.  ", method) ;
-    }
-
-    switch (stats [COLAMD_STATUS])
-    {
-
-       case COLAMD_OK_BUT_JUMBLED:
-
-           PRINTF ("Matrix has unsorted or duplicate row indices.\n") ;
-
-           PRINTF ("%s: number of duplicate or out-of-order row indices: %d\n",
-           method, i3) ;
-
-           PRINTF ("%s: last seen duplicate or out-of-order row index:   %d\n",
-           method, INDEX (i2)) ;
-
-           PRINTF ("%s: last seen in column:                             %d",
-           method, INDEX (i1)) ;
-
-           /* no break - fall through to next case instead */
-
-       case COLAMD_OK:
-
-           PRINTF ("\n") ;
-
-           PRINTF ("%s: number of dense or empty rows ignored:           %d\n",
-           method, stats [COLAMD_DENSE_ROW]) ;
-
-           PRINTF ("%s: number of dense or empty columns ignored:        %d\n",
-           method, stats [COLAMD_DENSE_COL]) ;
-
-           PRINTF ("%s: number of garbage collections performed:         %d\n",
-           method, stats [COLAMD_DEFRAG_COUNT]) ;
-           break ;
-
-       case COLAMD_ERROR_A_not_present:
-
-           PRINTF ("Array A (row indices of matrix) not present.\n") ;
-           break ;
-
-       case COLAMD_ERROR_p_not_present:
-
-           PRINTF ("Array p (column pointers for matrix) not present.\n") ;
-           break ;
-
-       case COLAMD_ERROR_nrow_negative:
-
-           PRINTF ("Invalid number of rows (%d).\n", i1) ;
-           break ;
-
-       case COLAMD_ERROR_ncol_negative:
-
-           PRINTF ("Invalid number of columns (%d).\n", i1) ;
-           break ;
-
-       case COLAMD_ERROR_nnz_negative:
-
-           PRINTF ("Invalid number of nonzero entries (%d).\n", i1) ;
-           break ;
-
-       case COLAMD_ERROR_p0_nonzero:
-
-           PRINTF ("Invalid column pointer, p [0] = %d, must be zero.\n", i1) ;
-           break ;
-
-       case COLAMD_ERROR_A_too_small:
-
-           PRINTF ("Array A too small.\n") ;
-           PRINTF ("        Need Alen >= %d, but given only Alen = %d.\n",
-           i1, i2) ;
-           break ;
-
-       case COLAMD_ERROR_col_length_negative:
-
-           PRINTF
-           ("Column %d has a negative number of nonzero entries (%d).\n",
-           INDEX (i1), i2) ;
-           break ;
-
-       case COLAMD_ERROR_row_index_out_of_bounds:
-
-           PRINTF
-           ("Row index (row %d) out of bounds (%d to %d) in column %d.\n",
-           INDEX (i2), INDEX (0), INDEX (i3-1), INDEX (i1)) ;
-           break ;
-
-       case COLAMD_ERROR_out_of_memory:
-
-           PRINTF ("Out of memory.\n") ;
-           break ;
-
-       case COLAMD_ERROR_internal_error:
-
-           /* if this happens, there is a bug in the code */
-           PRINTF
-           ("Internal error! Please contact authors (davis@cise.ufl.edu).\n") ;
-           break ;
-    }
-}
-
-
-
-
-/* ========================================================================== 
*/
-/* === colamd debugging routines ============================================ 
*/
-/* ========================================================================== 
*/
-
-/* When debugging is disabled, the remainder of this file is ignored. */
-
-#ifndef NDEBUG
-
-
-/* ========================================================================== 
*/
-/* === debug_structures ===================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    At this point, all empty rows and columns are dead.  All live columns
-    are "clean" (containing no dead rows) and simplicial (no supercolumns
-    yet).  Rows may contain dead columns, but all live rows contain at
-    least one live column.
-*/
-
-PRIVATE void debug_structures
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A [],
-    int n_col2
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int i ;
-    int c ;
-    int *cp ;
-    int *cp_end ;
-    int len ;
-    int score ;
-    int r ;
-    int *rp ;
-    int *rp_end ;
-    int deg ;
-
-    /* === Check A, Row, and Col ============================================ 
*/
-
-    for (c = 0 ; c < n_col ; c++)
-    {
-       if (COL_IS_ALIVE (c))
-       {
-           len = Col [c].length ;
-           score = Col [c].shared2.score ;
-           DEBUG4 (("initial live col %5d %5d %5d\n", c, len, score)) ;
-           ASSERT (len > 0) ;
-           ASSERT (score >= 0) ;
-           ASSERT (Col [c].shared1.thickness == 1) ;
-           cp = &A [Col [c].start] ;
-           cp_end = cp + len ;
-           while (cp < cp_end)
-           {
-               r = *cp++ ;
-               ASSERT (ROW_IS_ALIVE (r)) ;
-           }
-       }
-       else
-       {
-           i = Col [c].shared2.order ;
-           ASSERT (i >= n_col2 && i < n_col) ;
-       }
-    }
-
-    for (r = 0 ; r < n_row ; r++)
-    {
-       if (ROW_IS_ALIVE (r))
-       {
-           i = 0 ;
-           len = Row [r].length ;
-           deg = Row [r].shared1.degree ;
-           ASSERT (len > 0) ;
-           ASSERT (deg > 0) ;
-           rp = &A [Row [r].start] ;
-           rp_end = rp + len ;
-           while (rp < rp_end)
-           {
-               c = *rp++ ;
-               if (COL_IS_ALIVE (c))
-               {
-                   i++ ;
-               }
-           }
-           ASSERT (i > 0) ;
-       }
-    }
-}
-
-
-/* ========================================================================== 
*/
-/* === debug_deg_lists ====================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    Prints the contents of the degree lists.  Counts the number of columns
-    in the degree list and compares it to the total it should have.  Also
-    checks the row degrees.
-*/
-
-PRIVATE void debug_deg_lists
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int head [],
-    int min_score,
-    int should,
-    int max_deg
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int deg ;
-    int col ;
-    int have ;
-    int row ;
-
-    /* === Check the degree lists =========================================== 
*/
-
-    if (n_col > 10000 && colamd_debug <= 0)
-    {
-       return ;
-    }
-    have = 0 ;
-    DEBUG4 (("Degree lists: %d\n", min_score)) ;
-    for (deg = 0 ; deg <= n_col ; deg++)
-    {
-       col = head [deg] ;
-       if (col == EMPTY)
-       {
-           continue ;
-       }
-       DEBUG4 (("%d:", deg)) ;
-       while (col != EMPTY)
-       {
-           DEBUG4 ((" %d", col)) ;
-           have += Col [col].shared1.thickness ;
-           ASSERT (COL_IS_ALIVE (col)) ;
-           col = Col [col].shared4.degree_next ;
-       }
-       DEBUG4 (("\n")) ;
-    }
-    DEBUG4 (("should %d have %d\n", should, have)) ;
-    ASSERT (should == have) ;
-
-    /* === Check the row degrees ============================================ 
*/
-
-    if (n_row > 10000 && colamd_debug <= 0)
-    {
-       return ;
-    }
-    for (row = 0 ; row < n_row ; row++)
-    {
-       if (ROW_IS_ALIVE (row))
-       {
-           ASSERT (Row [row].shared1.degree <= max_deg) ;
-       }
-    }
-}
-
-
-/* ========================================================================== 
*/
-/* === debug_mark =========================================================== 
*/
-/* ========================================================================== 
*/
-
-/*
-    Ensures that the tag_mark is less that the maximum and also ensures that
-    each entry in the mark array is less than the tag mark.
-*/
-
-PRIVATE void debug_mark
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,
-    Colamd_Row Row [],
-    int tag_mark,
-    int max_mark
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int r ;
-
-    /* === Check the Row marks ============================================== 
*/
-
-    ASSERT (tag_mark > 0 && tag_mark <= max_mark) ;
-    if (n_row > 10000 && colamd_debug <= 0)
-    {
-       return ;
-    }
-    for (r = 0 ; r < n_row ; r++)
-    {
-       ASSERT (Row [r].shared2.mark < tag_mark) ;
-    }
-}
-
-
-/* ========================================================================== 
*/
-/* === debug_matrix ========================================================= 
*/
-/* ========================================================================== 
*/
-
-/*
-    Prints out the contents of the columns and the rows.
-*/
-
-PRIVATE void debug_matrix
-(
-    /* === Parameters ======================================================= 
*/
-
-    int n_row,
-    int n_col,
-    Colamd_Row Row [],
-    Colamd_Col Col [],
-    int A []
-)
-{
-    /* === Local variables ================================================== 
*/
-
-    int r ;
-    int c ;
-    int *rp ;
-    int *rp_end ;
-    int *cp ;
-    int *cp_end ;
-
-    /* === Dump the rows and columns of the matrix ========================== 
*/
-
-    if (colamd_debug < 3)
-    {
-       return ;
-    }
-    DEBUG3 (("DUMP MATRIX:\n")) ;
-    for (r = 0 ; r < n_row ; r++)
-    {
-       DEBUG3 (("Row %d alive? %d\n", r, ROW_IS_ALIVE (r))) ;
-       if (ROW_IS_DEAD (r))
-       {
-           continue ;
-       }
-       DEBUG3 (("start %d length %d degree %d\n",
-               Row [r].start, Row [r].length, Row [r].shared1.degree)) ;
-       rp = &A [Row [r].start] ;
-       rp_end = rp + Row [r].length ;
-       while (rp < rp_end)
-       {
-           c = *rp++ ;
-           DEBUG4 (("  %d col %d\n", COL_IS_ALIVE (c), c)) ;
-       }
-    }
-
-    for (c = 0 ; c < n_col ; c++)
-    {
-       DEBUG3 (("Col %d alive? %d\n", c, COL_IS_ALIVE (c))) ;
-       if (COL_IS_DEAD (c))
-       {
-           continue ;
-       }
-       DEBUG3 (("start %d length %d shared1 %d shared2 %d\n",
-               Col [c].start, Col [c].length,
-               Col [c].shared1.thickness, Col [c].shared2.score)) ;
-       cp = &A [Col [c].start] ;
-       cp_end = cp + Col [c].length ;
-       while (cp < cp_end)
-       {
-           r = *cp++ ;
-           DEBUG4 (("  %d row %d\n", ROW_IS_ALIVE (r), r)) ;
-       }
-    }
-}
-
-PRIVATE void colamd_get_debug
-(
-    char *method
-)
-{
-    colamd_debug = 0 ;         /* no debug printing */
-
-    /* get "D" environment variable, which gives the debug printing level */
-    if (getenv ("D"))
-    {
-       colamd_debug = atoi (getenv ("D")) ;
-    }
-
-    DEBUG0 (("%s: debug version, D = %d (THIS WILL BE SLOW!)\n",
-       method, colamd_debug)) ;
-}
-
-#endif /* NDEBUG */
-
diff --git a/superlu/colamd.h b/superlu/colamd.h
deleted file mode 100644
index 6e30662a..00000000
--- a/superlu/colamd.h
+++ /dev/null
@@ -1,246 +0,0 @@
-/* ========================================================================== 
*/
-/* === colamd/symamd prototypes and definitions ============================= 
*/
-/* ========================================================================== 
*/
-
-/*
-    You must include this file (colamd.h) in any routine that uses colamd,
-    symamd, or the related macros and definitions.
-
-    Authors:
-
-       The authors of the code itself are Stefan I. Larimore and Timothy A.
-       Davis (davis@cise.ufl.edu), University of Florida.  The algorithm was
-       developed in collaboration with John Gilbert, Xerox PARC, and Esmond
-       Ng, Oak Ridge National Laboratory.
-
-    Date:
-
-       September 8, 2003.  Version 2.3.
-
-    Acknowledgements:
-
-       This work was supported by the National Science Foundation, under
-       grants DMS-9504974 and DMS-9803599.
-
-    Notice:
-
-       Copyright (c) 1998-2003 by the University of Florida.
-       All Rights Reserved.
-
-       THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-       EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
-
-       Permission is hereby granted to use, copy, modify, and/or distribute
-       this program, provided that the Copyright, this License, and the
-       Availability of the original version is retained on all copies and made
-       accessible to the end-user of any code or package that includes COLAMD
-       or any modified version of COLAMD. 
-
-    Availability:
-
-       The colamd/symamd library is available at
-
-           http://www.cise.ufl.edu/research/sparse/colamd/
-
-       This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h
-       file.  It is required by the colamd.c, colamdmex.c, and symamdmex.c
-       files, and by any C code that calls the routines whose prototypes are
-       listed below, or that uses the colamd/symamd definitions listed below.
-
-*/
-
-#ifndef COLAMD_H
-#define COLAMD_H
-
-/* ========================================================================== 
*/
-/* === Include files ======================================================== 
*/
-/* ========================================================================== 
*/
-
-#include <stdlib.h>
-
-/* ========================================================================== 
*/
-/* === Knob and statistics definitions ====================================== 
*/
-/* ========================================================================== 
*/
-
-/* size of the knobs [ ] array.  Only knobs [0..1] are currently used. */
-#define COLAMD_KNOBS 20
-
-/* number of output statistics.  Only stats [0..6] are currently used. */
-#define COLAMD_STATS 20
-
-/* knobs [0] and stats [0]: dense row knob and output statistic. */
-#define COLAMD_DENSE_ROW 0
-
-/* knobs [1] and stats [1]: dense column knob and output statistic. */
-#define COLAMD_DENSE_COL 1
-
-/* stats [2]: memory defragmentation count output statistic */
-#define COLAMD_DEFRAG_COUNT 2
-
-/* stats [3]: colamd status:  zero OK, > 0 warning or notice, < 0 error */
-#define COLAMD_STATUS 3
-
-/* stats [4..6]: error info, or info on jumbled columns */ 
-#define COLAMD_INFO1 4
-#define COLAMD_INFO2 5
-#define COLAMD_INFO3 6
-
-/* error codes returned in stats [3]: */
-#define COLAMD_OK                              (0)
-#define COLAMD_OK_BUT_JUMBLED                  (1)
-#define COLAMD_ERROR_A_not_present             (-1)
-#define COLAMD_ERROR_p_not_present             (-2)
-#define COLAMD_ERROR_nrow_negative             (-3)
-#define COLAMD_ERROR_ncol_negative             (-4)
-#define COLAMD_ERROR_nnz_negative              (-5)
-#define COLAMD_ERROR_p0_nonzero                        (-6)
-#define COLAMD_ERROR_A_too_small               (-7)
-#define COLAMD_ERROR_col_length_negative       (-8)
-#define COLAMD_ERROR_row_index_out_of_bounds   (-9)
-#define COLAMD_ERROR_out_of_memory             (-10)
-#define COLAMD_ERROR_internal_error            (-999)
-
-/* ========================================================================== 
*/
-/* === Row and Column structures ============================================ 
*/
-/* ========================================================================== 
*/
-
-/* User code that makes use of the colamd/symamd routines need not directly */
-/* reference these structures.  They are used only for the COLAMD_RECOMMENDED 
*/
-/* macro. */
-
-typedef struct Colamd_Col_struct
-{
-    int start ;                /* index for A of first row in this column, or 
DEAD */
-                       /* if column is dead */
-    int length ;       /* number of rows in this column */
-    union
-    {
-       int thickness ; /* number of original columns represented by this */
-                       /* col, if the column is alive */
-       int parent ;    /* parent in parent tree super-column structure, if */
-                       /* the column is dead */
-    } shared1 ;
-    union
-    {
-       int score ;     /* the score used to maintain heap, if col is alive */
-       int order ;     /* pivot ordering of this column, if col is dead */
-    } shared2 ;
-    union
-    {
-       int headhash ;  /* head of a hash bucket, if col is at the head of */
-                       /* a degree list */
-       int hash ;      /* hash value, if col is not in a degree list */
-       int prev ;      /* previous column in degree list, if col is in a */
-                       /* degree list (but not at the head of a degree list) */
-    } shared3 ;
-    union
-    {
-       int degree_next ;       /* next column, if col is in a degree list */
-       int hash_next ;         /* next column, if col is in a hash list */
-    } shared4 ;
-
-} Colamd_Col ;
-
-typedef struct Colamd_Row_struct
-{
-    int start ;                /* index for A of first col in this row */
-    int length ;       /* number of principal columns in this row */
-    union
-    {
-       int degree ;    /* number of principal & non-principal columns in row */
-       int p ;         /* used as a row pointer in init_rows_cols () */
-    } shared1 ;
-    union
-    {
-       int mark ;      /* for computing set differences and marking dead rows*/
-       int first_column ;/* first column in row (used in garbage collection) */
-    } shared2 ;
-
-} Colamd_Row ;
-
-/* ========================================================================== 
*/
-/* === Colamd recommended memory size ======================================= 
*/
-/* ========================================================================== 
*/
-
-/*
-    The recommended length Alen of the array A passed to colamd is given by
-    the COLAMD_RECOMMENDED (nnz, n_row, n_col) macro.  It returns -1 if any
-    argument is negative.  2*nnz space is required for the row and column
-    indices of the matrix. COLAMD_C (n_col) + COLAMD_R (n_row) space is
-    required for the Col and Row arrays, respectively, which are internal to
-    colamd.  An additional n_col space is the minimal amount of "elbow room",
-    and nnz/5 more space is recommended for run time efficiency.
-
-    This macro is not needed when using symamd.
-
-    Explicit typecast to int added Sept. 23, 2002, COLAMD version 2.2, to avoid
-    gcc -pedantic warning messages.
-*/
-
-#define COLAMD_C(n_col) ((int) (((n_col) + 1) * sizeof (Colamd_Col) / sizeof 
(int)))
-#define COLAMD_R(n_row) ((int) (((n_row) + 1) * sizeof (Colamd_Row) / sizeof 
(int)))
-
-#define COLAMD_RECOMMENDED(nnz, n_row, n_col)                                 \
-(                                                                             \
-((nnz) < 0 || (n_row) < 0 || (n_col) < 0)                                     \
-?                                                                             \
-    (-1)                                                                      \
-:                                                                             \
-    (2 * (nnz) + COLAMD_C (n_col) + COLAMD_R (n_row) + (n_col) + ((nnz) / 5)) \
-)
-
-/* ========================================================================== 
*/
-/* === Prototypes of user-callable routines ================================= 
*/
-/* ========================================================================== 
*/
-
-int colamd_recommended         /* returns recommended value of Alen, */
-                               /* or (-1) if input arguments are erroneous */
-(
-    int nnz,                   /* nonzeros in A */
-    int n_row,                 /* number of rows in A */
-    int n_col                  /* number of columns in A */
-) ;
-
-void colamd_set_defaults       /* sets default parameters */
-(                              /* knobs argument is modified on output */
-    double knobs [COLAMD_KNOBS]        /* parameter settings for colamd */
-) ;
-
-int colamd                     /* returns (1) if successful, (0) otherwise*/
-(                              /* A and p arguments are modified on output */
-    int n_row,                 /* number of rows in A */
-    int n_col,                 /* number of columns in A */
-    int Alen,                  /* size of the array A */
-    int A [],                  /* row indices of A, of size Alen */
-    int p [],                  /* column pointers of A, of size n_col+1 */
-    double knobs [COLAMD_KNOBS],/* parameter settings for colamd */
-    int stats [COLAMD_STATS]   /* colamd output statistics and error codes */
-) ;
-
-int symamd                             /* return (1) if OK, (0) otherwise */
-(
-    int n,                             /* number of rows and columns of A */
-    int A [],                          /* row indices of A */
-    int p [],                          /* column pointers of A */
-    int perm [],                       /* output permutation, size n_col+1 */
-    double knobs [COLAMD_KNOBS],       /* parameters (uses defaults if NULL) */
-    int stats [COLAMD_STATS],          /* output statistics and error codes */
-    void * (*allocate) (size_t, size_t),
-                                       /* pointer to calloc (ANSI C) or */
-                                       /* mxCalloc (for MATLAB mexFunction) */
-    void (*release) (void *)
-                                       /* pointer to free (ANSI C) or */
-                                       /* mxFree (for MATLAB mexFunction) */
-) ;
-
-void colamd_report
-(
-    int stats [COLAMD_STATS]
-) ;
-
-void symamd_report
-(
-    int stats [COLAMD_STATS]
-) ;
-
-#endif /* COLAMD_H */
diff --git a/superlu/cpanel_bmod.c b/superlu/cpanel_bmod.c
deleted file mode 100644
index b899953b..00000000
--- a/superlu/cpanel_bmod.c
+++ /dev/null
@@ -1,478 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-
-extern void ctrsv_();
-extern void cgemv_();
-
-/* 
- * Function prototypes 
- */
-void clsolve(int, int, complex *, complex *);
-void cmatvec(int, int, int, complex *, complex *, complex *);
-extern void ccheck_tempv();
-
-void
-cpanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           complex     *dense,     /* out, of size n by w */
-           complex     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
-{
-/* 
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int          incx = 1, incy = 1;
-    complex       alpha, beta;
-#endif
-
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    complex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    complex       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    complex       *dense_col;  /* dense[] for a column in the panel */
-    complex       *tempv1;             /* Used in 1-D update */
-    complex       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    complex      zero = {0.0, 0.0};
-    complex      one = {1.0, 0.0};
-    complex      comp_temp, comp_temp1;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-
-       repfnz_col = repfnz;
-       dense_col = dense;
-       
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-
-           TriTmp = tempv;
-       
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-           
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-       
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       c_sub(&ukj1, &ukj1, &comp_temp);
-
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   }
-
-               } else  {       /* segsze >= 4 */
-                   
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#else
-                   ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#endif
-#else          
-                   clsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-#endif
-                   
-
-               } /* else ... */
-           
-           }  /* for jj ... end tri-solves */
-
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   
-#ifdef USE_VENDOR_BLAS
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#else
-                   cgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#endif
-#else
-                   cmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-#endif
-                   
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       c_sub(&dense_col[irow], &dense_col[irow], 
-                              &MatvecTmp[i]);
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-                   
-               } /* for jj ... */
-               
-           } /* for each block row ... */
-           
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-               
-           } /* for jj ... */
-           
-       } else { /* 1-D block modification */
-           
-           
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-               
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       c_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       cc_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       c_sub(&ukj1, &ukj1, &comp_temp);
-
-                       cc_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       c_add(&comp_temp, &comp_temp, &comp_temp1);
-                       c_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           cc_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           cc_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           cc_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           c_add(&comp_temp, &comp_temp, &comp_temp1);
-                           c_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   }
-
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-                   
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#else
-                   ctrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#endif
-                   
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-                   cgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-                   clsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   cmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-#endif
-                   
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       c_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-                   
-               } /* else segsze>=4 ... */
-               
-           } /* for each column in the panel... */
-           
-       } /* else 1-D update ... */
-
-    } /* for each updating supernode ... */
-
-}
-
-
-
diff --git a/superlu/cpanel_dfs.c b/superlu/cpanel_dfs.c
deleted file mode 100644
index 8d7b9835..00000000
--- a/superlu/cpanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_cdefs.h"
-
-void
-cpanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          complex     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
-{
-/*
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    complex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
visited 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    complex    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-#endif
-
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-               
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
kperm);
-#endif
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-                   
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-#endif
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                             
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
%d\n",chrep,myfnz,kchild,chperm);
-#endif
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
maxdfs);
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-#endif
-                                   } /* else */
-                                 
-                               } /* else */
-                             
-                           } /* if... */
-                           
-                       } /* while xdfs < maxdfs */
-                       
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-                       
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
krep,xdfs,maxdfs);
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-#endif
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
*/
-                   
-               } /* else */
-               
-           } /* else */
-           
-       } /* for each nonz in A[*,jj] */
-       
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-       
-    } /* for jj ... */
-    
-}
diff --git a/superlu/cpivotL.c b/superlu/cpivotL.c
deleted file mode 100644
index 006be071..00000000
--- a/superlu/cpivotL.c
+++ /dev/null
@@ -1,171 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-
-#undef DEBUG
-
-int
-cpivotL(
-        const int  jcol,     /* in */
-        const float u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
-{
-/*
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    complex one = {1.0, 0.0};
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    float       pivmax, rtemp, thresh;
-    complex       temp;
-    complex       *lu_sup_ptr; 
-    complex       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    complex       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-}
-#endif
-    
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-        rtemp = c_abs1 (&lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-
-    thresh = u * pivmax;
-    
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = c_abs1 (&lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-            rtemp = c_abs1 (&lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-
-    /* cdiv operation */
-    ops[FACT] += 10 * (nsupr - nsupc);
-
-    c_div(&temp, &one, &lu_col_ptr[nsupc]);
-    for (k = nsupc+1; k < nsupr; k++) 
-       cc_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);
-
-    return 0;
-}
-
diff --git a/superlu/cpivotgrowth.c b/superlu/cpivotgrowth.c
deleted file mode 100644
index c1dd72a8..00000000
--- a/superlu/cpivotgrowth.c
+++ /dev/null
@@ -1,130 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <math.h>
-#include "slu_cdefs.h"
-
-float
-cPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
-{
-/*
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_C; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_C; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_C; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    float   rpg, maxaj, maxuj;
-    extern   double slamch_(char *);
-    float   smlnum;
-    complex   *luval;
-    complex   temp_comp;
-   
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    rpg = 1. / smlnum;
-
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, c_abs1( &Aval[i]) );
-       
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, c_abs1( &Uval[i]) );
-           
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, c_abs1( &luval[i]) );
-
-           ++nz_in_U;
-           luval += nsupr;
-
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       
-       if ( j >= ncols ) break;
-    }
-
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
-}
diff --git a/superlu/cpruneL.c b/superlu/cpruneL.c
deleted file mode 100644
index f43617f3..00000000
--- a/superlu/cpruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_cdefs.h"
-
-void
-cpruneL(
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
-{
-/*
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    complex     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    complex     *lusup;
-    int        *xlusup;
-
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           
-           if ( do_prune ) {
-
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-
-               while ( kmin <= kmax ) {
-
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-
-                       kmin++;
-                       kmax--;
-
-                   }
-
-               } /* while */
-
-               xprune[irep] = kmin;    /* Pruning */
-
-#ifdef CHK_PRUNE
-       printf("    After cpruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-#endif
-           } /* if do_prune */
-
-       } /* if */
-
-    } /* for each U-segment... */
-}
diff --git a/superlu/creadhb.c b/superlu/creadhb.c
deleted file mode 100644
index 47572075..00000000
--- a/superlu/creadhb.c
+++ /dev/null
@@ -1,288 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_cdefs.h"
-
-
-/* Eat up the rest of the current line */
-int cDumpLine(FILE *fp)
-{
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-}
-
-int cParseIntFormat(char *buf, int *num, int *size)
-{
-    char *tmp;
-
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-}
-
-int cParseFloatFormat(char *buf, int *num, int *size)
-{
-    char *tmp, *period;
-    
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-
-    return 0;
-}
-
-int cReadVector(FILE *fp, int n, int *where, int perline, int persize)
-{
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    dummy = 0;
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-
-    return 0;
-}
-
-/* Read complex numbers as pairs of (real, imaginary) */
-int cReadValues(FILE *fp, int n, complex *destination, int perline, int 
persize)
-{
-  register int i, j, k, s, pair;
-  register float realpart;
-  char tmp, buf[100], *dummy;
-  
-  i = pair = 0;
-  while (i < n) {
-    dummy = fgets(buf, 100, fp);    /* read a line at a time */
-    for (j=0; j<perline && i<n; j++) {
-      tmp = buf[(j+1)*persize];     /* save the char at that place */
-      buf[(j+1)*persize] = 0;       /* null terminate */
-      s = j*persize;
-      for (k = 0; k < persize; ++k) /* No D_ format in C */
-        if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-      if ( pair == 0 ) {
-        /* The value is real part */
-        realpart = atof(&buf[s]);
-        pair = 1;
-      } else {
-        /* The value is imaginary part */
-        destination[i].r = realpart;
-        destination[i++].i = atof(&buf[s]);
-        pair = 0;
-      }
-      buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-    }
-  }
-  return 0;
-}
-
-
-void
-creadhb(int *nrow, int *ncol, int *nonz,
-       complex **nzval, int **rowind, int **colptr)
-{
-/* 
- * Purpose
- * =======
- * 
- * Read a COMPLEX PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
(VALFMT) 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-
-    register int i, numer_lines = 0, rhscrd = 0;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize, dummy;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-    
-    dummy = 0;
-
-    fp = stdin;
-
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    dummy += fputs(buf, stdout);
-#if 0
-    dummy += fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    cDumpLine(fp);
-#endif
-
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    cDumpLine(fp);
-
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-#endif
-    
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    cDumpLine(fp);
-
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    callocateA(*ncol, *nonz, nzval, rowind, colptr);
-
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    cParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    cParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    cParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    cDumpLine(fp);
-
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) cDumpLine(fp); /* skip RHSFMT */
-    
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-#endif
-    
-    cReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    cReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        cReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    
-    fclose(fp);
-
-}
-
diff --git a/superlu/csnode_bmod.c b/superlu/csnode_bmod.c
deleted file mode 100644
index 8c4812c6..00000000
--- a/superlu/csnode_bmod.c
+++ /dev/null
@@ -1,117 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
-
-/*
- * Performs numeric block updates within the relaxed snode. 
- */
-int
-csnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           complex     *dense,    /* in */
-           complex     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-{
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int            incx = 1, incy = 1;
-    complex         alpha = {-1.0, 0.0},  beta = {1.0, 0.0};
-#endif
-
-    complex   comp_zero = {0.0, 0.0};
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    complex         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-
-    nextlu = xlusup[jcol];
-    
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = comp_zero;
-       ++nextlu;
-    }
-
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    
-    if ( fsupc < jcol ) {
-
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       ctrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       cgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       clsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       cmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           c_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
-            ++iptr;
-           tempv[i] = comp_zero;
-       }
-#endif
-
-    }
-
-    return 0;
-}
diff --git a/superlu/csnode_dfs.c b/superlu/csnode_dfs.c
deleted file mode 100644
index c979aa21..00000000
--- a/superlu/csnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_cdefs.h"
-
-int
-csnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-{
-/* Purpose
- * =======
- *    csnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = cLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-
-    return 0;
-}
-
diff --git a/superlu/csp_blas2.c b/superlu/csp_blas2.c
deleted file mode 100644
index f24fa807..00000000
--- a/superlu/csp_blas2.c
+++ /dev/null
@@ -1,577 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-/*
- * File name:          csp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "slu_cdefs.h"
-extern void ctrsv_();
-extern void cgemv_();
-
-/* 
- * Function prototypes 
- */
-void cusolve(int, int, complex*, complex*);
-void clsolve(int, int, complex*, complex*);
-void cmatvec(int, int, int, complex*, complex*, complex*);
-
-
-int
-sp_ctrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, complex *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_ctrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A^H*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_C, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_C, Mtype = TRU.
- *    
- *   x       - (input/output) complex*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    complex   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    complex temp;
-    complex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    complex comp_zero = {0.0, 0.0};
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    complex *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_ctrsv", &i);
-       return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = complexCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_ctrsv().");
-    
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
-               solve_ops += 8 * nrow * nsupc;
-
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       cc_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
-                       c_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-                   ctrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   cgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-                   clsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-               
-                   cmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif         
-               
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       c_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
-                       work[i] = comp_zero;
-
-                   }
-               }
-           } /* for k ... */
-           
-       } else {
-           /* Form x := inv(U)*x */
-           
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-
-               if ( nsupc == 1 ) {
-                   c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       cc_mult(&comp_zero, &x[fsupc], &Uval[i]);
-                       c_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-#else
-                   ctrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else          
-                   cusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif         
-
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                       cc_mult(&comp_zero, &x[jcol], &Uval[i]);
-                       c_sub(&x[irow], &x[irow], &comp_zero);
-                       }
-                    }
-               }
-           } /* for k ... */
-           
-       }
-    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       cc_mult(&comp_zero, &x[irow], &Lval[i]);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#else
-                   ctrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#endif
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       cc_mult(&comp_zero, &x[irow], &Uval[i]);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-
-               if ( nsupc == 1 ) {
-                   c_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#else
-                   ctrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-               }
-           } /* for k ... */
-       }
-    } else { /* Form x := conj(inv(A'))*x */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := conj(inv(L'))*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                        cc_conj(&temp, &Lval[i]);
-                       cc_mult(&comp_zero, &x[irow], &temp);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#else
-                    ctrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-               }
-           }
-       } else {
-           /* Form x := conj(inv(U'))*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                        cc_conj(&temp, &Uval[i]);
-                       cc_mult(&comp_zero, &x[irow], &temp);
-                       c_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-
-                /* 1 c_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
- 
-               if ( nsupc == 1 ) {
-                    cc_conj(&temp, &Lval[luptr]);
-                   c_div(&x[fsupc], &x[fsupc], &temp);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#else
-                    ctrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
-                               &x[fsupc], &incx);
-#endif
-               }
-           } /* for k ... */
-       }
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-int
-sp_cgemv(char *trans, complex alpha, SuperMatrix *A, complex *x, 
-        int incx, complex beta, complex *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_cgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) complex
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Before entry, the leading m by n part of the array A must   
-             contain the matrix of coefficients.   
-
-    X      - (input) complex*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) complex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) complex*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-            
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    complex   *Aval;
-    int info;
-    complex temp, temp1;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    complex comp_zero = {0.0, 0.0};
-    complex comp_one = {1.0, 0.0};
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_cgemv ", &info);
-       return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || 
-       c_eq(&alpha, &comp_zero) && 
-       c_eq(&beta, &comp_one))
-       return 0;
-
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if ( !c_eq(&beta, &comp_one) ) {
-       if (incy == 1) {
-           if ( c_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) y[i] = comp_zero;
-           else
-               for (i = 0; i < leny; ++i) 
-                 cc_mult(&y[i], &beta, &y[i]);
-       } else {
-           iy = ky;
-           if ( c_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = comp_zero;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   cc_mult(&y[iy], &beta, &y[iy]);
-                   iy += incy;
-               }
-       }
-    }
-    
-    if ( c_eq(&alpha, &comp_zero) ) return 0;
-
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if ( !c_eq(&x[jx], &comp_zero) ) {
-                   cc_mult(&temp, &alpha, &x[jx]);
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       cc_mult(&temp1, &temp,  &Aval[i]);
-                       c_add(&y[irow], &y[irow], &temp1);
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = comp_zero;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   cc_mult(&temp1, &Aval[i], &x[irow]);
-                   c_add(&temp, &temp, &temp1);
-               }
-               cc_mult(&temp1, &alpha, &temp);
-               c_add(&y[jy], &y[jy], &temp1);
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;    
-} /* sp_cgemv */
-
diff --git a/superlu/csp_blas3.c b/superlu/csp_blas3.c
deleted file mode 100644
index 1f5e628c..00000000
--- a/superlu/csp_blas3.c
+++ /dev/null
@@ -1,141 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-
-#include "slu_cdefs.h"
-
-int
-sp_cgemm(char *transa, char *transb, int m, int n, int k, 
-         complex alpha, SuperMatrix *A, complex *b, int ldb, 
-         complex beta, complex *c, int ldc)
-{
-/*  Purpose   
-    =======   
-
-    sp_c performs one of the matrix-matrix operations   
-
-       C := alpha*op( A )*op( B ) + beta*C,   
-
-    where  op( X ) is one of 
-
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-  
-
-    Parameters   
-    ==========   
-
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-            
-    ALPHA  - (input) complex
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_C; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    B      - COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-
-    BETA   - (input) complex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-
-    C      - COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-
-    ==== Sparse Level 3 Blas routine.   
-*/
-    int    incx = 1, incy = 1;
-    int    j;
-
-    for (j = 0; j < n; ++j) {
-       sp_cgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
-}
diff --git a/superlu/cutil.c b/superlu/cutil.c
deleted file mode 100644
index 91b45fa7..00000000
--- a/superlu/cutil.c
+++ /dev/null
@@ -1,482 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include "slu_cdefs.h"
-
-void
-cCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      complex *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NCformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-}
-
-void
-cCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      complex *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NRformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-}
-
-/* Copy matrix A into matrix B. */
-void
-cCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((complex *)Bstore->nzval)[i] = ((complex *)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-}
-
-
-void
-cCreate_Dense_Matrix(SuperMatrix *X, int m, int n, complex *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    DNformat    *Xstore;
-    
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (complex *) x;
-}
-
-void
-cCopy_Dense_Matrix(int M, int N, complex *X, int ldx,
-                       complex *Y, int ldy)
-{
-/*
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-}
-
-void
-cCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       complex *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    SCformat *Lstore;
-
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
-
-}
-
-
-/*
- * Convert a row compressed storage into a column compressed storage.
- */
-void
-cCompRow_to_CompCol(int m, int n, int nnz, 
-                   complex *a, int *colind, int *rowptr,
-                   complex **at, int **rowind, int **colptr)
-{
-    register int i, j, col, relpos;
-    int *marker;
-
-    /* Allocate storage for another copy of the matrix. */
-    *at = (complex *) complexMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-
-    SUPERLU_FREE(marker);
-}
-
-
-void
-cPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-{
-    NCformat     *Astore;
-    register int i,n;
-    float       *dp;
-    
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < 2*Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-cPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-{
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    float       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
-          d += 2;      
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < 2*Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-#endif
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-cPrint_Dense_Matrix(char *what, SuperMatrix *A)
-{
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    float       *dp;
-    
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i + j*2*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
-}
-
-/*
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-void
-cprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-{
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    complex  *lusup;
-    int     *xlusup;
-    complex  *ucol;
-    int     *usub, *xusub;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f, %10.4f\n", usub[i], ucol[i].r, ucol[i].i);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f, %10.4f\n", lsub[i], lusup[k].r, lusup[k].i);
-       i++; k++;
-    }
-    fflush(stdout);
-}
-
-
-/*
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void ccheck_tempv(int n, complex *tempv)
-{
-    int i;
-       
-    for (i = 0; i < n; i++) {
-       if ((tempv[i].r != 0.0) || (tempv[i].i != 0.0))
-       {
-           fprintf(stderr,"tempv[%d] = {%f, %f}\n", i, tempv[i].r, tempv[i].i);
-           ABORT("ccheck_tempv");
-       }
-    }
-}
-
-
-void
-cGenXtrue(int n, int nrhs, complex *x, int ldx)
-{
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx].r = 1.0;
-           x[i + j*ldx].i = 0.0;
-       }
-}
-
-/*
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-void
-cFillRHS(trans_t trans, int nrhs, complex *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore;
-    complex   *Aval;
-    DNformat *Bstore;
-    complex   *rhs;
-    complex one = {1.0, 0.0};
-    complex zero = {0.0, 0.0};
-    int      ldc;
-    char transc[1];
-
-    Astore = A->Store;
-    Aval   = (complex *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-
-    sp_cgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
-
-}
-
-/* 
- * Fills a complex precision array with a given value.
- */
-void 
-cfill(complex *a, int alen, complex dval)
-{
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
-}
-
-
-
-/* 
- * Check the inf-norm of the error vector 
- */
-void cinf_norm_error(int nrhs, SuperMatrix *X, complex *xtrue)
-{
-    DNformat *Xstore;
-    float err, xnorm;
-    complex *Xmat, *soln_work;
-    complex temp;
-    int i, j;
-
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-        c_sub(&temp, &soln_work[i], &xtrue[i]);
-       err = SUPERLU_MAX(err, c_abs(&temp));
-       xnorm = SUPERLU_MAX(xnorm, c_abs(&soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-}
-
-
-
-/* Print performance of the code. */
-void
-cPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           float rpg, float rcond, float *ferr,
-           float *berr, char *equed, SuperLUStat_t *stat)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    
-    utime = stat->utime;
-    ops   = stat->ops;
-    
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-       
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-       
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-    
-}
-
-
-
-
-int print_complex_vec(char *what, int n, complex *vec)
-{
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f%f\n", i, vec[i].r, vec[i].i);
-    return 0;
-}
-
diff --git a/superlu/dcolumn_bmod.c b/superlu/dcolumn_bmod.c
deleted file mode 100644
index 0f4e2b3c..00000000
--- a/superlu/dcolumn_bmod.c
+++ /dev/null
@@ -1,354 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
-
-/* 
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-dcolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            double     *dense,   /* in */
-            double     *tempv,   /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
-{
-/*
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int         incx = 1, incy = 1;
-    double      alpha, beta;
-    
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    double       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    double       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    double       *tempv1;
-    double      zero = 0.0;
-    double      one = 1.0;
-    double      none = -1.0;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-
-           ops[TRSV] += segsze * (segsze - 1);
-           ops[GEMV] += 2 * nrow * segsze;
-
-
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   dense[irow] -=  ukj*lusup[luptr];
-                   luptr++;
-               }
-
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   ukj -= ukj1 * lusup[luptr1];
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                                       + ukj1*lusup[luptr1] );
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   ukj1 -= ukj2 * lusup[luptr2-1];
-                   ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                            + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                   }
-               }
-
-
-
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-
-               no_zeros = kfnz - fst_col;
-
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-               
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#else          
-               if (nsupr < segsze) {
-                 fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d 
incx=%d\n", segsze, nsupr, incx);
-                 return -10000000;
-               }
-                 
-               dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#endif         
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-               dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-               dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               dmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-#endif
-               
-               
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   dense[irow] -= tempv1[i];
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-           
-       } /* if jsupno ... */
-
-    } /* for each segment... */
-
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-    if ( fst_col < jcol ) {
-
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-       
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#else
-       if (nsupr < nsupc) {
-         fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d incx=%d\n", 
nsupc, nsupr, incx);
-         return -10000000;
-       }
-       dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#endif
-       
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-
-#ifdef _CRAY
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-
-       dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-       
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[isub] -= tempv[i];
-           tempv[i] = 0.0;
-           ++isub;
-       }
-
-#endif
-       
-       
-    } /* if fst_col < jcol ... */ 
-
-    return 0;
-}
diff --git a/superlu/dcolumn_dfs.c b/superlu/dcolumn_dfs.c
deleted file mode 100644
index f62ab50c..00000000
--- a/superlu/dcolumn_dfs.c
+++ /dev/null
@@ -1,267 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include "slu_ddefs.h"
-
-/* What type of supernodes we want */
-#define T2_SUPER
-
-int
-dcolumn_dfs(
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
-{
-/* 
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-
-
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
*/
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
-                                        
dLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-
-                          } /* else */
-
-                       } /* if */
-
-                   } /* while */
-
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-
-               } while ( kpar != EMPTY );      /* Until empty stack */
-
-           } /* else */
-
-       } /* else */
-
-    } /* for each nonzero ... */
-
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-#endif
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-#ifdef CHK_COMPRESS
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-#endif
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-
-    }  /* else: jcol > 0 */ 
-    
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-
-    return 0;
-}
diff --git a/superlu/dcomplex.c b/superlu/dcomplex.c
deleted file mode 100644
index b8e41c7f..00000000
--- a/superlu/dcomplex.c
+++ /dev/null
@@ -1,116 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-/*
- * This file defines common arithmetic operations for complex type.
- */
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "slu_dcomplex.h"
-
-
-/* Complex Division c = a/b */
-void z_div(doublecomplex *c, doublecomplex *a, doublecomplex *b)
-{
-    double ratio, den;
-    double abr, abi, cr, ci;
-  
-    if( (abr = b->r) < 0.)
-       abr = - abr;
-    if( (abi = b->i) < 0.)
-       abi = - abi;
-    if( abr <= abi ) {
-       if (abi == 0) {
-           fprintf(stderr, "z_div.c: division by zero\n");
-            exit(-1);
-       }         
-       ratio = b->r / b->i ;
-       den = b->i * (1 + ratio*ratio);
-       cr = (a->r*ratio + a->i) / den;
-       ci = (a->i*ratio - a->r) / den;
-    } else {
-       ratio = b->i / b->r ;
-       den = b->r * (1 + ratio*ratio);
-       cr = (a->r + a->i*ratio) / den;
-       ci = (a->i - a->r*ratio) / den;
-    }
-    c->r = cr;
-    c->i = ci;
-}
-
-
-/* Returns sqrt(z.r^2 + z.i^2) */
-double z_abs(doublecomplex *z)
-{
-    double temp;
-    double real = z->r;
-    double imag = z->i;
-
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-    if (imag > real) {
-       temp = real;
-       real = imag;
-       imag = temp;
-    }
-    if ((real+imag) == real) return(real);
-  
-    temp = imag/real;
-    temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
-    return (temp);
-}
-
-
-/* Approximates the abs */
-/* Returns abs(z.r) + abs(z.i) */
-double z_abs1(doublecomplex *z)
-{
-    double real = z->r;
-    double imag = z->i;
-  
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-
-    return (real + imag);
-}
-
-/* Return the exponentiation */
-void z_exp(doublecomplex *r, doublecomplex *z)
-{
-    double expx;
-
-    expx = exp(z->r);
-    r->r = expx * cos(z->i);
-    r->i = expx * sin(z->i);
-}
-
-/* Return the complex conjugate */
-void d_cnjg(doublecomplex *r, doublecomplex *z)
-{
-    r->r = z->r;
-    r->i = -z->i;
-}
-
-/* Return the imaginary part */
-double d_imag(doublecomplex *z)
-{
-    return (z->i);
-}
-
-
diff --git a/superlu/dcopy_to_ucol.c b/superlu/dcopy_to_ucol.c
deleted file mode 100644
index 4d2bed09..00000000
--- a/superlu/dcopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_ddefs.h"
-
-int
-dcopy_to_ucol(
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             double     *dense,   /* modified - reset to zero on return */
-             GlobalLU_t *Glu      /* modified */
-             )
-{
-/* 
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    double    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-
-    double zero = 0.0;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = dLUMemXpand(jcol, nextu, UCOL, &nzumax, 
Glu))
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = dLUMemXpand(jcol, nextu, USUB, &nzumax, 
Glu))
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-
-           }
-
-       }
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
-}
diff --git a/superlu/dgscon.c b/superlu/dgscon.c
deleted file mode 100644
index 2e8e81bd..00000000
--- a/superlu/dgscon.c
+++ /dev/null
@@ -1,156 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-/*
- * File name:  dgscon.c
- * History:     Modified from lapack routines DGECON.
- */
-#include <math.h>
-#include "slu_ddefs.h"
-
-void
-dgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       double anorm, double *rcond, SuperLUStat_t *stat, int *info)
-{
-/*
-    Purpose   
-    =======   
-
-    DGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by DGETRF.   
-
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-           
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            dgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- 
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            dgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_D, Mtype = TRU.
-           
-    ANORM   (input) double
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-           
-    RCOND   (output) double*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    double ainvnm;
-    double *work;
-    int    *iwork;
-    extern int drscl_(int *, double *, double *, int *);
-
-    extern int dlacon_(int *, double *, double *, int *, double *, int *);
-
-    
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgscon", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-
-    work = doubleCalloc( 3*L->nrow );
-    iwork = intMalloc( L->nrow );
-
-
-    if ( !work || !iwork )
-       ABORT("Malloc fails for work arrays in dgscon.");
-    
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-
-    do {
-       dlacon_(&L->nrow, &work[L->nrow], &work[0], &iwork[0], &ainvnm, &kase);
-
-       if (kase == 0) break;
-
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_dtrsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(U). */
-           sp_dtrsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-           
-       } else {
-
-           /* Multiply by inv(U'). */
-           sp_dtrsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(L'). */
-           sp_dtrsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-           
-       }
-
-    } while ( kase != 0 );
-
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-
-    SUPERLU_FREE (work);
-    SUPERLU_FREE (iwork);
-    return;
-
-} /* dgscon */
-
diff --git a/superlu/dgsequ.c b/superlu/dgsequ.c
deleted file mode 100644
index dcd42a5b..00000000
--- a/superlu/dgsequ.c
+++ /dev/null
@@ -1,206 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:  dgsequ.c
- * History:     Modified from LAPACK routine DGEEQU
- */
-#include <math.h>
-#include "slu_ddefs.h"
-
-void
-dgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
-       double *colcnd, double *amax, int *info)
-{
-/*    
-    Purpose   
-    =======   
-
-    DGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_D; Mtype = SLU_GE.
-           
-    R       (output) double*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-           
-    C       (output) double*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-           
-    ROWCND  (output) double*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-           
-    COLCND  (output) double*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-           
-    AMAX    (output) double*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int i, j, irow;
-    double rcmin, rcmax;
-    double bignum, smlnum;
-    extern double dlamch_(char *);
-    
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgsequ", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    bignum = 1. / smlnum;
-
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    return;
-
-} /* dgsequ */
-
-
diff --git a/superlu/dgsrfs.c b/superlu/dgsrfs.c
deleted file mode 100644
index c92aecd2..00000000
--- a/superlu/dgsrfs.c
+++ /dev/null
@@ -1,447 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-/*
- * File name:  dgsrfs.c
- * History:     Modified from lapack routine DGERFS
- */
-#include <math.h>
-#include "slu_ddefs.h"
-
-void
-dgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, double *R, double *C,
-       SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr,
-       SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose   
- *   =======   
- *
- *   DGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = 
SLU_TRLU.
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           dgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) double*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) double*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by dgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) double*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) double*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
count.
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-
-#define ITMAX 5
-    
-    /* Table of constant values */
-    int    ione = 1;
-    double ndone = -1.;
-    double done = 1.;
-    
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    double   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    double   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    double   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    double   *work;
-    double   *rwork;
-    int      *iwork;
-    extern double dlamch_(char *);
-    extern int dlacon_(int *, double *, double *, int *, double *, int *);
-#ifdef _CRAY
-    extern int SCOPY(int *, double *, int *, double *, int *);
-    extern int SSAXPY(int *, double *, double *, int *, double *, int *);
-#else
-    extern int dcopy_(int *, double *, int *, double *, int *);
-    extern int daxpy_(int *, double *, double *, int *, double *, int *);
-#endif
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_D || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgsrfs", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    
-    /* Allocate working space */
-    work = doubleMalloc(2*A->nrow);
-    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
-    iwork = intMalloc(2*A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = dlamch_("Epsilon");
-    safmin = dlamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-       
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-
-       while (1) { /* Loop until stopping criterion is satisfied. */
-
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-           
-#ifdef _CRAY
-           SCOPY(&A->nrow, Bptr, &ione, work, &ione);
-#else
-           dcopy_(&A->nrow, Bptr, &ione, work, &ione);
-#endif
-           sp_dgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-
-           for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-           
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = fabs( Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += fabs(Aval[i]) * fabs(Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-#ifdef _CRAY
-               SAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#else
-               daxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#endif
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-        
-       } /* end while */
-
-       stat->RefineSteps = count;
-
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-
-          Use DLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       
-       for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-       
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = fabs( Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = fabs( Xptr[irow] );
-                   s += fabs(Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-
-       kase = 0;
-
-       do {
-           dlacon_(&A->nrow, &work[A->nrow], work,
-                   &iwork[A->nrow], &ferr[j], &kase);
-           if (kase == 0) break;
-
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
-               
-               dgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-               
-               dgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= R[i];  
-           }
-           
-       } while ( kase != 0 );
-
-
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-
-    } /* for each RHS j ... */
-    
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-
-    return;
-
-} /* dgsrfs */
diff --git a/superlu/dgssv.c b/superlu/dgssv.c
deleted file mode 100644
index 25469f45..00000000
--- a/superlu/dgssv.c
+++ /dev/null
@@ -1,231 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-
-void
-dgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * DGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from DGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_D; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    
-    /* Set default values for some parameters */
-    double   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("dgssv", &i);
-       return;
-    }
-
-    utime = stat->utime;
-
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-
-    etree = intMalloc(A->ncol);
-
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    dgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        dgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/dgssvx.c b/superlu/dgssvx.c
deleted file mode 100644
index bc7efc3b..00000000
--- a/superlu/dgssvx.c
+++ /dev/null
@@ -1,626 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-
-void
-dgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, double *R, double *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, 
-       double *rcond, double *ferr, double *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * DGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from dgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) double*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) double*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) double*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) double*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) double*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) double*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-
-    DNformat  *Bstore, *Xstore;
-    double    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    double    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    double    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-
-    /* External functions */
-    extern double dlangs(char *, SuperMatrix *);
-    extern double dlamch_(char *);
-
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = dlamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-#endif
-
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_D || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_D || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_D || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("dgssvx", &i);
-       return;
-    }
-    
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-
-    utime = stat->utime;
-    
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       dCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       dgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           dlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                       Bmat[i + j*ldb] *= R[i];
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Bmat[i + j*ldb] *= C[i];
-               }
-        }
-    }
-
-    if ( nofact ) {
-       
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-    
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       dgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = dPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = dPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-
-    if ( options->ConditionNumber ) {
-      if (*info == 0) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = dlangs(norm, AA);
-        dgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-      } else *rcond = 0;
-    }
-    
-    if ( *info == 0 && nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-    
-        t0 = SuperLU_timer_();
-        dgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-    
-        /* Use iterative refinement to improve the computed solution and 
compute
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            dgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        Xmat[i + j*ldx] *= C[i];
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Xmat[i + j*ldx] *= R[i];
-                }
-        }
-    } /* end if nrhs > 0 */
-
-    if ( *info == 0 && options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < dlamch_("E") ) *info = A->ncol + 1;
-    }
-
-    if ( *info != -10000000 && nofact ) {
-        dQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/dgstrf.c b/superlu/dgstrf.c
deleted file mode 100644
index 035e624a..00000000
--- a/superlu/dgstrf.c
+++ /dev/null
@@ -1,441 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-
-void
-dgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * DGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_D; Mtype = SLU_GE.
- *
- * drop_tol (input) double (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see dpanel_dfs.c; marker2 is used for inner-factorization,
- *            see dcolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during dpanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in dsp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    double    *dwork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    double    *dense, *tempv;
-    int       *relax_end;
-    double    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = dLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &dwork);
-    if ( *info ) return;
-    
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    dSetRWork(m, panel_size, dwork, &dense, &tempv);
-    
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-      if (handle_getfem_callback() != 0) {
-       iinfo = *info = -333333333; goto HOUSTON_WE_HAVE_A_PROBLEM; 
-       break;
-      }
-
-
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = dsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = dLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
)
-                   return;
-           }
-    
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-
-               /* Numeric update within the snode */
-               dsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-               if ( (*info = dpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               
-#ifdef DEBUG
-               dprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol = icol;
-
-       } else { /* Work on one panel of panel_size columns */
-           
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-
-           /* symbolic factor on a panel of columns */
-           dpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           
-           /* numeric sup-panel updates in topological order */
-           dpanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-               nseg = nseg1;   /* Begin after all the panel segments */
-
-               if ((*info = dcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) 
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-
-               /* Numeric updates */
-               if ((*info = dcolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0)
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-               
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = dcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-
-               if ( (*info = dpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                 goto HOUSTON_WE_HAVE_A_PROBLEM;
-
-               /* Prune columns (0:jj-1) using column jj */
-               dpruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-               
-#ifdef DEBUG
-               dprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol += panel_size; /* Move to the next panel */
-
-       } /* else */
-
-    } /* for */
-
-    *info = iinfo;
-    
- HOUSTON_WE_HAVE_A_PROBLEM: /* try to avoid ugly leaks.. */
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-
-    if (*info == 0) {
-      countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-      fixupL(min_mn, perm_r, &Glu);
-    }
-
-    dLUWorkFree(iwork, dwork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        dCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_D, SLU_TRLU);
-       dCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_D, SLU_TRU);
-    }
-    
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-
-}
diff --git a/superlu/dgstrs.c b/superlu/dgstrs.c
deleted file mode 100644
index 8735b879..00000000
--- a/superlu/dgstrs.c
+++ /dev/null
@@ -1,330 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-extern void dtrsm_();
-extern void dgemm_();
-
-/* 
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-void
-dgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * DGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * DGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    double   alpha = 1.0, beta = 1.0;
-    double   *work_col;
-#endif
-    DNformat *Bstore;
-    double   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    double   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void dprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_D || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("dgstrs", &i);
-       return;
-    }
-
-    n = L->nrow;
-    work = doubleCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doubleMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-
-           solve_ops += nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 2 * nrow * nsupc * nrhs;
-           
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#else
-               dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#endif
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work_col[i]; /* Scatter */
-                       work_col[i] = 0.0;
-                       iptr++;
-                   }
-               }
-#else          
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work[i];
-                       work[i] = 0.0;
-                       iptr++;
-                   }
-               }
-#endif             
-           } /* else ... */
-       } /* for L-solve */
-
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       dprint_soln(n, nrhs, Bmat);
-#endif
-
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-
-           solve_ops += nsupc * (nsupc + 1) * nrhs;
-
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work[fsupc] /= Lval[luptr];
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-               dtrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else          
-               for (j = 0; j < nrhs; j++)
-                   dusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif         
-           }
-
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       rhs_work[irow] -= rhs_work[jcol] * Uval[i];
-                   }
-               }
-           }
-           
-       } /* for U-solve */
-
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       dprint_soln(n, nrhs, Bmat);
-#endif
-
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-       stat->ops[SOLVE] = 0;
-       for (k = 0; k < nrhs; ++k) {
-           
-           /* Multiply by inv(U'). */
-           sp_dtrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-           
-           /* Multiply by inv(L'). */
-           sp_dtrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-           
-       }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-dprint_soln(int n, int nrhs, double *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/superlu/dgstrsL.c b/superlu/dgstrsL.c
deleted file mode 100644
index 3ac6dfec..00000000
--- a/superlu/dgstrsL.c
+++ /dev/null
@@ -1,230 +0,0 @@
-
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * September 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-#include "slu_util.h"
-
-
-/* 
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-void
-dgstrsL(char *trans, SuperMatrix *L, int *perm_r, SuperMatrix *B, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * DGSTRSL only performs the L-solve using the LU factorization computed
- * by DGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) char*
- *          Specifies the form of the system of equations:
- *          = 'N':  A * X = B  (No transpose)
- *          = 'T':  A'* X = B  (Transpose)
- *          = 'C':  A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_D, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         dgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_D, Mtype = SLU_TRU.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_D, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-    double   alpha = 1.0, beta = 1.0;
-    DNformat *Bstore;
-    double   *Bmat;
-    SCformat *Lstore;
-    double   *Lval, *Uval;
-    int      nrow, notran;
-    int      fsupc, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    double   *work, *work_col, *rhs_work, *soln;
-    flops_t  solve_ops;
-    extern SuperLUStat_t SuperLUStat;
-    void dprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    notran = lsame_(trans, "N");
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C") ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_D || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_D || B->Mtype != SLU_GE )
-       *info = -4;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("dgstrsL", &i);
-       return;
-    }
-
-    n = L->nrow;
-    work = doubleCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doubleMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    solve_ops = 0;
-    
-    if ( notran ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-
-           solve_ops += nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 2 * nrow * nsupc * nrhs;
-           
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#else
-               dtrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               dgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#endif
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work_col[i]; /* Scatter */
-                       work_col[i] = 0.0;
-                       iptr++;
-                   }
-               }
-#else          
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   dlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   dmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work[i];
-                       work[i] = 0.0;
-                       iptr++;
-                   }
-               }
-#endif             
-           } /* else ... */
-       } /* for L-solve */
-
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       dprint_soln(n, nrhs, Bmat);
-#endif
-       
-        SuperLUStat.ops[SOLVE] = solve_ops;
-
-    } else { 
-      printf("Transposed solve not implemented.\n");
-      exit(0);
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-dprint_soln(int n, int nrhs, double *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/superlu/dlacon.c b/superlu/dlacon.c
deleted file mode 100644
index 26d21220..00000000
--- a/superlu/dlacon.c
+++ /dev/null
@@ -1,250 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <math.h>
-#include "slu_Cnames.h"
-
-int
-dlacon_(int *n, double *v, double *x, int *isgn, double *est, int *kase)
-
-{
-/*
-    Purpose   
-    =======   
-
-    DLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-  
-
-    Arguments   
-    =========   
-
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-
-    V      (workspace) DOUBLE PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-
-    X      (input/output) DOUBLE PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           and DLACON must be re-called with all the other parameters   
-           unchanged.   
-
-    ISGN   (workspace) INT array, dimension (N)
-
-    EST    (output) DOUBLE PRECISION   
-           An estimate (a lower bound) for norm(A).   
-
-    KASE   (input/output) INT
-           On the initial call to DLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from DLACON, KASE will again be 0.   
-
-    Further Details   
-    ======= =======   
-
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-*/
-
-    /* Table of constant values */
-    int c__1 = 1;
-    double      zero = 0.0;
-    double      one = 1.0;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static double altsgn, estold;
-    static int i, j;
-    double temp;
-#ifdef _CRAY
-    extern int ISAMAX(int *, double *, int *);
-    extern double SASUM(int *, double *, int *);
-    extern int SCOPY(int *, double *, int *, double *, int *);
-#else
-    extern int idamax_(int *, double *, int *);
-    extern double dasum_(int *, double *, int *);
-    extern int dcopy_(int *, double *, int *, double *, int *);
-#endif
-#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
-#define i_dnnt(a) \
-       ( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */
-
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i] = 1. / (double) (*n);
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = fabs(v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-#ifdef _CRAY
-    *est = SASUM(n, x, &c__1);
-#else
-    *est = dasum_(n, x, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = idamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    SCOPY(n, x, &c__1, v, &c__1);
-#else
-    dcopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-#ifdef _CRAY
-    *est = SASUM(n, v, &c__1);
-#else
-    *est = dasum_(n, v, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i)
-       if (i_dnnt(d_sign(one, x[i])) != isgn[i])
-           goto L90;
-
-    /*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */
-    goto L120;
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-          X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = idamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    if (x[jlast] != fabs(x[j]) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1] = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-#ifdef _CRAY
-    temp = SASUM(n, x, &c__1) / (double)(*n * 3) * 2.;
-#else
-    temp = dasum_(n, x, &c__1) / (double)(*n * 3) * 2.;
-#endif
-    if (temp > *est) {
-#ifdef _CRAY
-       SCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-       dcopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-       *est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* dlacon_ */
diff --git a/superlu/dlamch.c b/superlu/dlamch.c
deleted file mode 100644
index e01db465..00000000
--- a/superlu/dlamch.c
+++ /dev/null
@@ -1,1004 +0,0 @@
-#include <stdio.h>
-#include "slu_Cnames.h"
-
-#define TRUE_ (1)
-#define FALSE_ (0)
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-
-double dlamch_(char *cmach)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-
-       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    Purpose   
-    =======   
-
-    DLAMCH determines double precision machine parameters.   
-
-    Arguments   
-    =========   
-
-    CMACH   (input) CHARACTER*1   
-            Specifies the value to be returned by DLAMCH:   
-            = 'E' or 'e',   DLAMCH := eps   
-            = 'S' or 's ,   DLAMCH := sfmin   
-            = 'B' or 'b',   DLAMCH := base   
-            = 'P' or 'p',   DLAMCH := eps*base   
-            = 'N' or 'n',   DLAMCH := t   
-            = 'R' or 'r',   DLAMCH := rnd   
-            = 'M' or 'm',   DLAMCH := emin   
-            = 'U' or 'u',   DLAMCH := rmin   
-            = 'L' or 'l',   DLAMCH := emax   
-            = 'O' or 'o',   DLAMCH := rmax   
-
-            where   
-
-            eps   = relative machine precision   
-            sfmin = safe minimum, such that 1/sfmin does not overflow   
-            base  = base of the machine   
-            prec  = eps*base   
-            t     = number of (base) digits in the mantissa   
-            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
-            emin  = minimum exponent before (gradual) underflow   
-            rmin  = underflow threshold - base**(emin-1)   
-            emax  = largest exponent before overflow   
-            rmax  = overflow threshold  - (base**emax)*(1-eps)   
-
-   ===================================================================== 
-*/
-
-    static int first = TRUE_;
-
-    /* System generated locals */
-    int i__1;
-    double ret_val;
-    /* Builtin functions */
-    double pow_di(double *, int *);
-    /* Local variables */
-    static double base;
-    static int beta;
-    static double emin, prec, emax;
-    static int imin, imax;
-    static int lrnd;
-    static double rmin, rmax, t, rmach;
-    extern int lsame_(char *, char *);
-    static double small, sfmin;
-    extern /* Subroutine */ int dlamc2_(int *, int *, int *, 
-           double *, int *, double *, int *, double *);
-    static int it;
-    static double rnd, eps;
-
-    if (first) {
-       first = FALSE_;
-       dlamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-       base = (double) beta;
-       t = (double) it;
-       if (lrnd) {
-           rnd = 1.;
-           i__1 = 1 - it;
-           eps = pow_di(&base, &i__1) / 2;
-       } else {
-           rnd = 0.;
-           i__1 = 1 - it;
-           eps = pow_di(&base, &i__1);
-       }
-       prec = eps * base;
-       emin = (double) imin;
-       emax = (double) imax;
-       sfmin = rmin;
-       small = 1. / rmax;
-       if (small >= sfmin) {
-
-       /* Use SMALL plus a bit, to avoid the possibility of rounding   
-             causing overflow when computing  1/sfmin. */
-           sfmin = small * (eps + 1.);
-       }
-    }
-
-    if (lsame_(cmach, "E")) {
-       rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-       rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-       rmach = base;
-    } else if (lsame_(cmach, "P")) {
-       rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-       rmach = t;
-    } else if (lsame_(cmach, "R")) {
-       rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-       rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-       rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-       rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-       rmach = rmax;
-    }
-
-    ret_val = rmach;
-    return ret_val;
-
-/*     End of DLAMCH */
-
-} /* dlamch_ */
-
-
-/* Subroutine */ int dlamc1_(int *beta, int *t, int *rnd, int 
-       *ieee1)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    DLAMC1 determines the machine parameters given by BETA, T, RND, and   
-    IEEE1.   
-
-    Arguments   
-    =========   
-
-    BETA    (output) INT   
-            The base of the machine.   
-
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-  
-            be a reliable guide to the way in which the machine performs 
-  
-            its arithmetic.   
-
-    IEEE1   (output) INT   
-            Specifies whether rounding appears to be done in the IEEE   
-            'round to nearest' style.   
-
-    Further Details   
-    ===============   
-
-    The routine is based on the routine  ENVRON  by Malcolm and   
-    incorporates suggestions by Gentleman and Marovich. See   
-
-       Malcolm M. A. (1972) Algorithms to reveal properties of   
-          floating-point arithmetic. Comms. of the ACM, 15, 949-951.   
-
-       Gentleman W. M. and Marovich S. B. (1974) More on algorithms   
-          that reveal properties of floating point arithmetic units.   
-          Comms. of the ACM, 17, 276-277.   
-
-   ===================================================================== 
-*/
-    /* Initialized data */
-    static int first = TRUE_;
-    /* System generated locals */
-    double d__1, d__2;
-    /* Local variables */
-    static int lrnd;
-    static double a, b, c, f;
-    static int lbeta;
-    static double savec;
-    extern double dlamc3_(double *, double *);
-    static int lieee1;
-    static double t1, t2;
-    static int lt;
-    static double one, qtr;
-
-    if (first) {
-       first = FALSE_;
-       one = 1.;
-
-/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BE
-TA,   
-          IEEE1, T and RND.   
-
-          Throughout this routine  we use the function  DLAMC3  to ens
-ure   
-          that relevant values are  stored and not held in registers, 
- or   
-          are not affected by optimizers.   
-
-          Compute  a = 2.0**m  with the  smallest positive integer m s
-uch   
-          that   
-
-             fl( a + 1.0 ) = a. */
-
-       a = 1.;
-       c = 1.;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L10:
-       if (c == one) {
-           a *= 2;
-           c = dlamc3_(&a, &one);
-           d__1 = -a;
-           c = dlamc3_(&c, &d__1);
-           goto L10;
-       }
-/* +       END WHILE   
-
-          Now compute  b = 2.0**m  with the smallest positive integer 
-m   
-          such that   
-
-             fl( a + b ) .gt. a. */
-
-       b = 1.;
-       c = dlamc3_(&a, &b);
-
-/* +       WHILE( C.EQ.A )LOOP */
-L20:
-       if (c == a) {
-           b *= 2;
-           c = dlamc3_(&a, &b);
-           goto L20;
-       }
-/* +       END WHILE   
-
-          Now compute the base.  a and c  are neighbouring floating po
-int   
-          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and
- so   
-          their difference is beta. Adding 0.25 to c is to ensure that
- it   
-          is truncated to beta and not ( beta - 1 ). */
-
-       qtr = one / 4;
-       savec = c;
-       d__1 = -a;
-       c = dlamc3_(&c, &d__1);
-       lbeta = (int) (c + qtr);
-
-/*        Now determine whether rounding or chopping occurs,  by addin
-g a   
-          bit  less  than  beta/2  and a  bit  more  than  beta/2  to 
- a. */
-
-       b = (double) lbeta;
-       d__1 = b / 2;
-       d__2 = -b / 100;
-       f = dlamc3_(&d__1, &d__2);
-       c = dlamc3_(&f, &a);
-       if (c == a) {
-           lrnd = TRUE_;
-       } else {
-           lrnd = FALSE_;
-       }
-       d__1 = b / 2;
-       d__2 = b / 100;
-       f = dlamc3_(&d__1, &d__2);
-       c = dlamc3_(&f, &a);
-       if (lrnd && c == a) {
-           lrnd = FALSE_;
-       }
-
-/*        Try and decide whether rounding is done in the  IEEE  'round
- to   
-          nearest' style. B/2 is half a unit in the last place of the 
-two   
-          numbers A and SAVEC. Furthermore, A is even, i.e. has last  
-bit   
-          zero, and SAVEC is odd. Thus adding B/2 to A should not  cha
-nge   
-          A, but adding B/2 to SAVEC should change SAVEC. */
-
-       d__1 = b / 2;
-       t1 = dlamc3_(&d__1, &a);
-       d__1 = b / 2;
-       t2 = dlamc3_(&d__1, &savec);
-       lieee1 = t1 == a && t2 > savec && lrnd;
-
-/*        Now find  the  mantissa, t.  It should  be the  integer part
- of   
-          log to the base beta of a,  however it is safer to determine
-  t   
-          by powering.  So we find t as the smallest positive integer 
-for   
-          which   
-
-             fl( beta**t + 1.0 ) = 1.0. */
-
-       lt = 0;
-       a = 1.;
-       c = 1.;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L30:
-       if (c == one) {
-           ++lt;
-           a *= lbeta;
-           c = dlamc3_(&a, &one);
-           d__1 = -a;
-           c = dlamc3_(&c, &d__1);
-           goto L30;
-       }
-/* +       END WHILE */
-
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    return 0;
-
-/*     End of DLAMC1 */
-
-} /* dlamc1_ */
-
-
-/* Subroutine */ int dlamc2_(int *beta, int *t, int *rnd, 
-       double *eps, int *emin, double *rmin, int *emax, 
-       double *rmax)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    DLAMC2 determines the machine parameters specified in its argument   
-    list.   
-
-    Arguments   
-    =========   
-
-    BETA    (output) INT   
-            The base of the machine.   
-
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-  
-            be a reliable guide to the way in which the machine performs 
-  
-            its arithmetic.   
-
-    EPS     (output) DOUBLE PRECISION   
-            The smallest positive number such that   
-
-               fl( 1.0 - EPS ) .LT. 1.0,   
-
-            where fl denotes the computed value.   
-
-    EMIN    (output) INT   
-            The minimum exponent before (gradual) underflow occurs.   
-
-    RMIN    (output) DOUBLE PRECISION   
-            The smallest normalized number for the machine, given by   
-            BASE**( EMIN - 1 ), where  BASE  is the floating point value 
-  
-            of BETA.   
-
-    EMAX    (output) INT   
-            The maximum exponent before overflow occurs.   
-
-    RMAX    (output) DOUBLE PRECISION   
-            The largest positive number for the machine, given by   
-            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point 
-  
-            value of BETA.   
-
-    Further Details   
-    ===============   
-
-    The computation of  EPS  is based on a routine PARANOIA by   
-    W. Kahan of the University of California at Berkeley.   
-
-   ===================================================================== 
-*/
-    /* Table of constant values */
-    static int c__1 = 1;
-    
-    /* Initialized data */
-    static int first = TRUE_;
-    static int iwarn = FALSE_;
-    /* System generated locals */
-    int i__1;
-    double d__1, d__2, d__3, d__4, d__5;
-    /* Builtin functions */
-    double pow_di(double *, int *);
-    /* Local variables */
-    static int ieee;
-    static double half;
-    static int lrnd;
-    static double leps, zero, a, b, c;
-    static int i, lbeta;
-    static double rbase;
-    static int lemin, lemax, gnmin;
-    static double small;
-    static int gpmin;
-    static double third, lrmin, lrmax, sixth;
-    extern /* Subroutine */ int dlamc1_(int *, int *, int *, 
-           int *);
-    extern double dlamc3_(double *, double *);
-    static int lieee1;
-    extern /* Subroutine */ int dlamc4_(int *, double *, int *), 
-           dlamc5_(int *, int *, int *, int *, int *, 
-           double *);
-    static int lt, ngnmin, ngpmin;
-    static double one, two;
-
-    if (first) {
-       first = FALSE_;
-       zero = 0.;
-       one = 1.;
-       two = 2.;
-
-/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values
- of   
-          BETA, T, RND, EPS, EMIN and RMIN.   
-
-          Throughout this routine  we use the function  DLAMC3  to ens
-ure   
-          that relevant values are stored  and not held in registers, 
- or   
-          are not affected by optimizers.   
-
-          DLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. 
-*/
-
-       dlamc1_(&lbeta, &lt, &lrnd, &lieee1);
-
-/*        Start to find EPS. */
-
-       b = (double) lbeta;
-       i__1 = -lt;
-       a = pow_di(&b, &i__1);
-       leps = a;
-
-/*        Try some tricks to see whether or not this is the correct  E
-PS. */
-
-       b = two / 3;
-       half = one / 2;
-       d__1 = -half;
-       sixth = dlamc3_(&b, &d__1);
-       third = dlamc3_(&sixth, &sixth);
-       d__1 = -half;
-       b = dlamc3_(&third, &d__1);
-       b = dlamc3_(&b, &sixth);
-       b = abs(b);
-       if (b < leps) {
-           b = leps;
-       }
-
-       leps = 1.;
-
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-L10:
-       if (leps > b && b > zero) {
-           leps = b;
-           d__1 = half * leps;
-/* Computing 5th power */
-           d__3 = two, d__4 = d__3, d__3 *= d__3;
-/* Computing 2nd power */
-           d__5 = leps;
-           d__2 = d__4 * (d__3 * d__3) * (d__5 * d__5);
-           c = dlamc3_(&d__1, &d__2);
-           d__1 = -c;
-           c = dlamc3_(&half, &d__1);
-           b = dlamc3_(&half, &c);
-           d__1 = -b;
-           c = dlamc3_(&half, &d__1);
-           b = dlamc3_(&half, &c);
-           goto L10;
-       }
-/* +       END WHILE */
-
-       if (a < leps) {
-           leps = a;
-       }
-
-/*        Computation of EPS complete.   
-
-          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3
-)).   
-          Keep dividing  A by BETA until (gradual) underflow occurs. T
-his   
-          is detected when we cannot recover the previous A. */
-
-       rbase = one / lbeta;
-       small = one;
-       for (i = 1; i <= 3; ++i) {
-           d__1 = small * rbase;
-           small = dlamc3_(&d__1, &zero);
-/* L20: */
-       }
-       a = dlamc3_(&one, &small);
-       dlamc4_(&ngpmin, &one, &lbeta);
-       d__1 = -one;
-       dlamc4_(&ngnmin, &d__1, &lbeta);
-       dlamc4_(&gpmin, &a, &lbeta);
-       d__1 = -a;
-       dlamc4_(&gnmin, &d__1, &lbeta);
-       ieee = FALSE_;
-
-       if (ngpmin == ngnmin && gpmin == gnmin) {
-           if (ngpmin == gpmin) {
-               lemin = ngpmin;
-/*            ( Non twos-complement machines, no gradual under
-flow;   
-                e.g.,  VAX ) */
-           } else if (gpmin - ngpmin == 3) {
-               lemin = ngpmin - 1 + lt;
-               ieee = TRUE_;
-/*            ( Non twos-complement machines, with gradual und
-erflow;   
-                e.g., IEEE standard followers ) */
-           } else {
-               lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-
-       } else if (ngpmin == gpmin && ngnmin == gnmin) {
-           if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-               lemin = max(ngpmin,ngnmin);
-/*            ( Twos-complement machines, no gradual underflow
-;   
-                e.g., CYBER 205 ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-
-       } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-                {
-           if (gpmin - min(ngpmin,ngnmin) == 3) {
-               lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*            ( Twos-complement machines with gradual underflo
-w;   
-                no known machine ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-
-       } else {
-/* Computing MIN */
-           i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-           lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-           iwarn = TRUE_;
-       }
-/* **   
-   Comment out this if block if EMIN is ok */
-       if (iwarn) {
-           first = TRUE_;
-           printf("\n\n WARNING. The value EMIN may be incorrect:- ");
-           printf("EMIN = %8i\n",lemin);
-           printf("If, after inspection, the value EMIN looks acceptable");
-            printf("please comment out \n the IF block as marked within the"); 
-            printf("code of routine DLAMC2, \n otherwise supply EMIN"); 
-            printf("explicitly.\n");
-       }
-/* **   
-
-          Assume IEEE arithmetic if we found denormalised  numbers abo
-ve,   
-          or if arithmetic seems to round in the  IEEE style,  determi
-ned   
-          in routine DLAMC1. A true IEEE machine should have both  thi
-ngs   
-          true; however, faulty machines may have one or the other. */
-
-       ieee = ieee || lieee1;
-
-/*        Compute  RMIN by successive division by  BETA. We could comp
-ute   
-          RMIN as BASE**( EMIN - 1 ),  but some machines underflow dur
-ing   
-          this computation. */
-
-       lrmin = 1.;
-       i__1 = 1 - lemin;
-       for (i = 1; i <= 1-lemin; ++i) {
-           d__1 = lrmin * rbase;
-           lrmin = dlamc3_(&d__1, &zero);
-/* L30: */
-       }
-
-/*        Finally, call DLAMC5 to compute EMAX and RMAX. */
-
-       dlamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-
-    return 0;
-
-
-/*     End of DLAMC2 */
-
-} /* dlamc2_ */
-
-
-double dlamc3_(double *a, double *b)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    DLAMC3  is intended to force  A  and  B  to be stored prior to doing 
-  
-    the addition of  A  and  B ,  for use in situations where optimizers 
-  
-    might hold one of these in a register.   
-
-    Arguments   
-    =========   
-
-    A, B    (input) DOUBLE PRECISION   
-            The values A and B.   
-
-   ===================================================================== 
-*/
-/* >>Start of File<<   
-       System generated locals */
-  volatile double ret_val; /* [added volatile to avoid -O3 optimizations.. 
(julien pommier)] */
-
-    ret_val = *a + *b;
-
-    return ret_val;
-
-/*     End of DLAMC3 */
-
-} /* dlamc3_ */
-
-
-/* Subroutine */ int dlamc4_(int *emin, double *start, int *base)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    DLAMC4 is a service routine for DLAMC2.   
-
-    Arguments   
-    =========   
-
-    EMIN    (output) EMIN   
-            The minimum exponent before (gradual) underflow, computed by 
-  
-            setting A = START and dividing by BASE until the previous A   
-            can not be recovered.   
-
-    START   (input) DOUBLE PRECISION   
-            The starting point for determining EMIN.   
-
-    BASE    (input) INT   
-            The base of the machine.   
-
-   ===================================================================== 
-*/
-    /* System generated locals */
-    int i__1;
-    double d__1;
-    /* Local variables */
-    static double zero, a;
-    static int i;
-    static double rbase, b1, b2, c1, c2, d1, d2;
-    extern double dlamc3_(double *, double *);
-    static double one;
-
-    a = *start;
-    one = 1.;
-    rbase = one / *base;
-    zero = 0.;
-    *emin = 1;
-    d__1 = a * rbase;
-    b1 = dlamc3_(&d__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.   
-      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
-L10:
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-       --(*emin);
-       a = b1;
-       d__1 = a / *base;
-       b1 = dlamc3_(&d__1, &zero);
-       d__1 = b1 * *base;
-       c1 = dlamc3_(&d__1, &zero);
-       d1 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d1 += b1;
-/* L20: */
-       }
-       d__1 = a * rbase;
-       b2 = dlamc3_(&d__1, &zero);
-       d__1 = b2 / rbase;
-       c2 = dlamc3_(&d__1, &zero);
-       d2 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d2 += b2;
-/* L30: */
-       }
-       goto L10;
-    }
-/* +    END WHILE */
-
-    return 0;
-
-/*     End of DLAMC4 */
-
-} /* dlamc4_ */
-
-
-/* Subroutine */ int dlamc5_(int *beta, int *p, int *emin, 
-       int *ieee, int *emax, double *rmax)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    DLAMC5 attempts to compute RMAX, the largest machine floating-point   
-    number, without overflow.  It assumes that EMAX + abs(EMIN) sum   
-    approximately to a power of 2.  It will fail on machines where this   
-    assumption does not hold, for example, the Cyber 205 (EMIN = -28625, 
-  
-    EMAX = 28718).  It will also fail if the value supplied for EMIN is   
-    too large (i.e. too close to zero), probably with overflow.   
-
-    Arguments   
-    =========   
-
-    BETA    (input) INT   
-            The base of floating-point arithmetic.   
-
-    P       (input) INT   
-            The number of base BETA digits in the mantissa of a   
-            floating-point value.   
-
-    EMIN    (input) INT   
-            The minimum exponent before (gradual) underflow.   
-
-    IEEE    (input) INT   
-            A int flag specifying whether or not the arithmetic   
-            system is thought to comply with the IEEE standard.   
-
-    EMAX    (output) INT   
-            The largest exponent before overflow   
-
-    RMAX    (output) DOUBLE PRECISION   
-            The largest machine floating-point number.   
-
-   ===================================================================== 
-  
-
-
-       First compute LEXP and UEXP, two powers of 2 that bound   
-       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
-       approximately to the bound that is closest to abs(EMIN).   
-       (EMAX is the exponent of the required number RMAX). */
-    /* Table of constant values */
-    static double c_b5 = 0.;
-    
-    /* System generated locals */
-    int i__1;
-    double d__1;
-    /* Local variables */
-    static int lexp;
-    static double oldy;
-    static int uexp, i;
-    static double y, z;
-    static int nbits;
-    extern double dlamc3_(double *, double *);
-    static double recbas;
-    static int exbits, expsum, try__;
-
-
-
-    lexp = 1;
-    exbits = 1;
-L10:
-    try__ = lexp << 1;
-    if (try__ <= -(*emin)) {
-       lexp = try__;
-       ++exbits;
-       goto L10;
-    }
-    if (lexp == -(*emin)) {
-       uexp = lexp;
-    } else {
-       uexp = try__;
-       ++exbits;
-    }
-
-/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater   
-       than or equal to EMIN. EXBITS is the number of bits needed to   
-       store the exponent. */
-
-    if (uexp + *emin > -lexp - *emin) {
-       expsum = lexp << 1;
-    } else {
-       expsum = uexp << 1;
-    }
-
-/*     EXPSUM is the exponent range, approximately equal to   
-       EMAX - EMIN + 1 . */
-
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
-
-/*     NBITS is the total number of bits needed to store a   
-       floating-point number. */
-
-    if (nbits % 2 == 1 && *beta == 2) {
-
-/*        Either there are an odd number of bits used to store a   
-          floating-point number, which is unlikely, or some bits are 
-  
-          not used in the representation of numbers, which is possible
-,   
-          (e.g. Cray machines) or the mantissa has an implicit bit,   
-          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
-   
-          most likely. We have to assume the last alternative.   
-          If this is true, then we need to reduce EMAX by one because 
-  
-          there must be some way of representing zero in an implicit-b
-it   
-          system. On machines like Cray, we are reducing EMAX by one 
-  
-          unnecessarily. */
-
-       --(*emax);
-    }
-
-    if (*ieee) {
-
-/*        Assume we are on an IEEE machine which reserves one exponent
-   
-          for infinity and NaN. */
-
-       --(*emax);
-    }
-
-/*     Now create RMAX, the largest machine number, which should   
-       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .   
-
-       First compute 1.0 - BETA**(-P), being careful that the   
-       result is less than 1.0 . */
-
-    recbas = 1. / *beta;
-    z = *beta - 1.;
-    y = 0.;
-    i__1 = *p;
-    for (i = 1; i <= *p; ++i) {
-       z *= recbas;
-       if (y < 1.) {
-           oldy = y;
-       }
-       y = dlamc3_(&y, &z);
-/* L20: */
-    }
-    if (y >= 1.) {
-       y = oldy;
-    }
-
-/*     Now multiply by BETA**EMAX to get RMAX. */
-
-    i__1 = *emax;
-    for (i = 1; i <= *emax; ++i) {
-       d__1 = y * *beta;
-       y = dlamc3_(&d__1, &c_b5);
-/* L30: */
-    }
-
-    *rmax = y;
-    return 0;
-
-/*     End of DLAMC5 */
-
-} /* dlamc5_ */
-
-double pow_di(double *ap, int *bp)
-{
-    double pow, x;
-    int n;
-
-    pow = 1;
-    x = *ap;
-    n = *bp;
-
-    if(n != 0){
-       if(n < 0) {
-           n = -n;
-           x = 1/x;
-       }
-       for( ; ; ) {
-           if(n & 01) pow *= x;
-           if(n >>= 1) x *= x;
-           else break;
-       }
-    }
-    return(pow);
-}
-
diff --git a/superlu/dlangs.c b/superlu/dlangs.c
deleted file mode 100644
index 811fb473..00000000
--- a/superlu/dlangs.c
+++ /dev/null
@@ -1,132 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:  dlangs.c
- * History:     Modified from lapack routine DLANGE
- */
-#include <math.h>
-#include "slu_ddefs.h"
-
-double dlangs(char *norm, SuperMatrix *A)
-{
-/* 
-    Purpose   
-    =======   
-
-    DLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-
-    Description   
-    ===========   
-
-    DLANGE returns the value   
-
-       DLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-
-    Arguments   
-    =========   
-
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in DLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-
-   ===================================================================== 
-*/
-    
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int      i, j, irow;
-    double   value, sum;
-    double   *rwork;
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-       
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, fabs( Aval[i]) );
-       
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += fabs(Aval[i]);
-           value = SUPERLU_MAX(value,sum);
-       }
-       
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += fabs(Aval[i]);
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       
-       SUPERLU_FREE (rwork);
-       
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-
-    return (value);
-
-} /* dlangs */
-
diff --git a/superlu/dlaqgs.c b/superlu/dlaqgs.c
deleted file mode 100644
index 807a5c1c..00000000
--- a/superlu/dlaqgs.c
+++ /dev/null
@@ -1,158 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:  dlaqgs.c
- * History:     Modified from LAPACK routine DLAQGE
- */
-#include <math.h>
-#include "slu_ddefs.h"
-
-void
-dlaqgs(SuperMatrix *A, double *r, double *c, 
-       double rowcnd, double colcnd, double amax, char *equed)
-{
-/*
-    Purpose   
-    =======   
-
-    DLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-
-    Arguments   
-    =========   
-
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_D; Mtype = GE.
-           
-    R       (input) double*, dimension (A->nrow)
-            The row scale factors for A.
-           
-    C       (input) double*, dimension (A->ncol)
-            The column scale factors for A.
-           
-    ROWCND  (input) double
-            Ratio of the smallest R(i) to the largest R(i).
-           
-    COLCND  (input) double
-            Ratio of the smallest C(i) to the largest C(i).
-           
-    AMAX    (input) double
-            Absolute value of largest matrix entry.
-           
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-
-    Internal Parameters   
-    ===================   
-
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-
-    ===================================================================== 
-*/
-
-#define THRESH    (0.1)
-    
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int i, j, irow;
-    double large, small, cj;
-    extern double dlamch_(char *);
-
-
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Initialize LARGE and SMALL. */
-    small = dlamch_("Safe minimum") / dlamch_("Precision");
-    large = 1. / small;
-
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   Aval[i] *= cj;
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= r[irow];
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= cj * r[irow];
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-
-    return;
-
-} /* dlaqgs */
-
diff --git a/superlu/dmemory.c b/superlu/dmemory.c
deleted file mode 100644
index 4ca23363..00000000
--- a/superlu/dmemory.c
+++ /dev/null
@@ -1,690 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-
-/* Internal prototypes */
-void  *dexpand (int *, MemType,int, int, GlobalLU_t *);
-int   dLUWorkInit (int, int, int, int **, double **, LU_space_t);
-void  copy_mem_double (int, void *, void *);
-void  dStackCompress (GlobalLU_t *);
-void  dSetupSpace (void *, int, LU_space_t *);
-void  *duser_malloc (int, int);
-void  duser_free (int, int);
-
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(double) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
-
-
-
-
-/*
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void dSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-{
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-}
-
-
-
-void *duser_malloc(int bytes, int which_end)
-{
-    void *buf;
-    
-    if ( StackFull(bytes) ) return (NULL);
-
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    
-    stack.used += bytes;
-    return buf;
-}
-
-
-void duser_free(int bytes, int which_end)
-{
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
-}
-
-
-
-/*
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int dQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(double);
-
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-
-    mem_usage->expansions = --no_expand;
-
-    return 0;
-} /* dQuerySpace */
-
-/*
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-int
-dLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, double **dwork)
-{
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    double   *lusup;
-    int      *xlusup;
-    double   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(double);
-
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           dSetupSpace(work, lwork, &Glu->MemModel);
-       }
-       
-#if ( PRNTlevel >= 1 )
-       printf("dLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-#endif 
-       
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)duser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)duser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)duser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)duser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)duser_malloc((n+1) * iword, HEAD);
-       }
-
-       lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    dexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    dexpand( &nzumax, USUB, 0, 1, Glu );
-
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               duser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("dLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-#endif
-           lusup = (double *) dexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (double *) dexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    dexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    dexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-       
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    
-    info = dLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + dmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    
-    ++no_expand;
-    return 0;
-    
-} /* dLUMemInit */
-
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-int
-dLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            double **dworkptr, LU_space_t MemModel)
-{
-    int    isize, dsize, extra;
-    double *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(double);
-    
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) duser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "dLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-
-    if ( MemModel == SYSTEM )
-       *dworkptr = (double *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (double *) duser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (double*) DoubleAlign(*dworkptr);
-           *dworkptr = (double*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("dLUWorkInit: not aligned, extra %d\n", extra);
-#endif     
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-       
-    return 0;
-}
-
-
-/*
- * Set up pointers for real working arrays.
- */
-void
-dSetRWork(int m, int panel_size, double *dworkptr,
-        double **dense, double **tempv)
-{
-    double zero = 0.0;
-
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    dfill (*dense, m * panel_size, zero);
-    dfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
-}
-       
-/*
- * Free the working storage used by factor routines.
- */
-void dLUWorkFree(int *iwork, double *dwork, GlobalLU_t *Glu)
-{
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     dStackCompress(Glu);  */
-    }
-    
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-}
-
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-dLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
*/
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-{
-    void   *new_mem;
-    
-#ifdef DEBUG    
-    printf("dLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-#endif    
-
-    if (mem_type == USUB) 
-       new_mem = dexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = dexpand(maxlen, mem_type, next, 0, Glu);
-    
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (dmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (double *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (double *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    
-    return 0;
-    
-}
-
-
-
-void
-copy_mem_double(int howmany, void *old, void *new)
-{
-    register int i;
-    double *dold = old;
-    double *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
-}
-
-/*
- * Expand the existing storage to accommodate more fill-ins.
- */
-void
-*dexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-{
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-
-    alpha = EXPAND;
-
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(double);
-
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_double(len_to_copy, expanders[type].mem, new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-       
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = duser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-#endif         
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-               
-           } /* if ... */
-
-       } /* else ... */
-    }
-
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    
-    return (void *) expanders[type].mem;
-    
-} /* dexpand */
-
-
-/*
- * Compress the work[] array to remove fragmentation.
- */
-void
-dStackCompress(GlobalLU_t *Glu)
-{
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    double   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    double   *ucol, *lusup;
-    
-    iword = sizeof(int);
-    dword = sizeof(double);
-    ndim = Glu->n;
-
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    
-    dfrom = ucol;
-    dto = (double *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_double(xusub[ndim], dfrom, dto);
-    ucol = dto;
-
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-    
-#ifdef DEBUG
-    printf("dStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
-#endif    
-    
-}
-
-/*
- * Allocate storage for original matrix A
- */
-void
-dallocateA(int n, int nnz, double **a, int **asub, int **xa)
-{
-    *a    = (double *) doubleMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-}
-
-
-double *doubleMalloc(int n)
-{
-    double *buf;
-    buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doubleMalloc()\n");
-    }
-    return (buf);
-}
-
-double *doubleCalloc(int n)
-{
-    double *buf;
-    register int i;
-    double zero = 0.0;
-    buf = (double *) SUPERLU_MALLOC((size_t)n * sizeof(double));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doubleCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-}
-
-
-int dmemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-{
-    register int iword, dword;
-
-    iword   = sizeof(int);
-    dword   = sizeof(double);
-    
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
-
-}
diff --git a/superlu/dmyblas2.c b/superlu/dmyblas2.c
deleted file mode 100644
index c5dad344..00000000
--- a/superlu/dmyblas2.c
+++ /dev/null
@@ -1,246 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:          dmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void dlsolve ( int ldm, int ncol, double *M, double *rhs )
-{
-    int k;
-    double x0, x1, x2, x3, x4, x5, x6, x7;
-    double *M0;
-    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      Mki4 = Mki3 + ldm + 1;
-      Mki5 = Mki4 + ldm + 1;
-      Mki6 = Mki5 + ldm + 1;
-      Mki7 = Mki6 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++;
-      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++;
-      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
-      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
-                          - x6 * *Mki6++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      rhs[++firstcol] = x4;
-      rhs[++firstcol] = x5;
-      rhs[++firstcol] = x6;
-      rhs[++firstcol] = x7;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++
-                        - x4 * *Mki4++ - x5 * *Mki5++
-                       - x6 * *Mki6++ - x7 * *Mki7++;
- 
-      M0 += 8 * ldm + 8;
-    }
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++;
- 
-      M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-
-      rhs[++firstcol] = x1;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
- 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-dusolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-double *M;     /* in */
-double *rhs;   /* modified */
-{
-    double xj;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-       xj = rhs[jcol] / M[jcol + jcol*ldm];            /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       
-       for (irow = 0; irow < jcol; irow++)
-           rhs[irow] -= xj * M[irow + jcol*ldm];       /* M(irow, jcol) */
-
-       jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void dmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-double *M;     /* in */
-double *vec;   /* in */
-double *Mxvec; /* in/out */
-
-{
-    double vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    double *M0;
-    register double *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) {    /* Do 8 columns */
-
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-        Mki2 = Mki1 + ldm;
-        Mki3 = Mki2 + ldm;
-       Mki4 = Mki3 + ldm;
-       Mki5 = Mki4 + ldm;
-       Mki6 = Mki5 + ldm;
-       Mki7 = Mki6 + ldm;
-
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       vi4 = vec[firstcol++];
-       vi5 = vec[firstcol++];
-       vi6 = vec[firstcol++];
-       vi7 = vec[firstcol++];  
-
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ 
-                     + vi4 * *Mki4++ + vi5 * *Mki5++
-                     + vi6 * *Mki6++ + vi7 * *Mki7++;
-
-       M0 += 8 * ldm;
-    }
-
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-
-       M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {                /* Do 1 column */
-
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++)
-           Mxvec[k] += vi0 * *Mki0++;
-
-       M0 += ldm;
-    }
-       
-}
-
diff --git a/superlu/dpanel_bmod.c b/superlu/dpanel_bmod.c
deleted file mode 100644
index 0e1cc00a..00000000
--- a/superlu/dpanel_bmod.c
+++ /dev/null
@@ -1,449 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
-
-/* 
- * Function prototypes 
- */
-void dlsolve(int, int, double *, double *);
-void dmatvec(int, int, int, double *, double *, double *);
-extern void dcheck_tempv();
-
-void
-dpanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           double     *dense,     /* out, of size n by w */
-           double     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
-{
-/* 
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int          incx = 1, incy = 1;
-    double       alpha, beta;
-#endif
-
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    double       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    double       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    double       *dense_col;  /* dense[] for a column in the panel */
-    double       *tempv1;             /* Used in 1-D update */
-    double       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    double      zero = 0.0;
-    double      one = 1.0;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-
-       repfnz_col = repfnz;
-       dense_col = dense;
-       
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-
-           TriTmp = tempv;
-       
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-           
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-       
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-
-               } else  {       /* segsze >= 4 */
-                   
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#else
-                   dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#endif
-#else          
-                   dlsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-#endif
-                   
-
-               } /* else ... */
-           
-           }  /* for jj ... end tri-solves */
-
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   
-#ifdef USE_VENDOR_BLAS
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#else
-                   dgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#endif
-#else
-                   dmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-#endif
-                   
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= MatvecTmp[i];
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-                   
-               } /* for jj ... */
-               
-           } /* for each block row ... */
-           
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-               
-           } /* for jj ... */
-           
-       } else { /* 1-D block modification */
-           
-           
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-               
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-                   
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#else
-                   dtrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#endif
-                   
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-                   dgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-                   dlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   dmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-#endif
-                   
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= tempv1[i];
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-                   
-               } /* else segsze>=4 ... */
-               
-           } /* for each column in the panel... */
-           
-       } /* else 1-D update ... */
-
-    } /* for each updating supernode ... */
-
-}
-
-
-
diff --git a/superlu/dpanel_dfs.c b/superlu/dpanel_dfs.c
deleted file mode 100644
index 75783bc5..00000000
--- a/superlu/dpanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_ddefs.h"
-
-void
-dpanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          double     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
-{
-/*
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    double    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
visited 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    double    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-#endif
-
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-               
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
kperm);
-#endif
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-                   
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-#endif
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                             
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
%d\n",chrep,myfnz,kchild,chperm);
-#endif
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
maxdfs);
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-#endif
-                                   } /* else */
-                                 
-                               } /* else */
-                             
-                           } /* if... */
-                           
-                       } /* while xdfs < maxdfs */
-                       
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-                       
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
krep,xdfs,maxdfs);
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-#endif
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
*/
-                   
-               } /* else */
-               
-           } /* else */
-           
-       } /* for each nonz in A[*,jj] */
-       
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-       
-    } /* for jj ... */
-    
-}
diff --git a/superlu/dpivotL.c b/superlu/dpivotL.c
deleted file mode 100644
index 6ba206b1..00000000
--- a/superlu/dpivotL.c
+++ /dev/null
@@ -1,170 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-
-#undef DEBUG
-
-int
-dpivotL(
-        const int  jcol,     /* in */
-        const double u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
-{
-/*
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    double       pivmax, rtemp, thresh;
-    double       temp;
-    double       *lu_sup_ptr; 
-    double       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    double       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-}
-#endif
-    
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-       rtemp = fabs (lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-
-    thresh = u * pivmax;
-    
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = fabs (lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-           rtemp = fabs (lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-
-    /* cdiv operation */
-    ops[FACT] += nsupr - nsupc;
-
-    temp = 1.0 / lu_col_ptr[nsupc];
-    for (k = nsupc+1; k < nsupr; k++) 
-       lu_col_ptr[k] *= temp;
-
-    return 0;
-}
-
diff --git a/superlu/dpivotgrowth.c b/superlu/dpivotgrowth.c
deleted file mode 100644
index 6e641199..00000000
--- a/superlu/dpivotgrowth.c
+++ /dev/null
@@ -1,129 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <math.h>
-#include "slu_ddefs.h"
-
-double
-dPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
-{
-/*
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_D; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_D; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_D; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    double   rpg, maxaj, maxuj;
-    extern   double dlamch_(char *);
-    double   smlnum;
-    double   *luval;
-   
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    rpg = 1. / smlnum;
-
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, fabs(Aval[i]) );
-       
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, fabs(Uval[i]) );
-           
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, fabs(luval[i]) );
-
-           ++nz_in_U;
-           luval += nsupr;
-
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       
-       if ( j >= ncols ) break;
-    }
-
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
-}
diff --git a/superlu/dpruneL.c b/superlu/dpruneL.c
deleted file mode 100644
index 670d061c..00000000
--- a/superlu/dpruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_ddefs.h"
-
-void
-dpruneL(
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
-{
-/*
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    double     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    double     *lusup;
-    int        *xlusup;
-
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           
-           if ( do_prune ) {
-
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-
-               while ( kmin <= kmax ) {
-
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-
-                       kmin++;
-                       kmax--;
-
-                   }
-
-               } /* while */
-
-               xprune[irep] = kmin;    /* Pruning */
-
-#ifdef CHK_PRUNE
-       printf("    After dpruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-#endif
-           } /* if do_prune */
-
-       } /* if */
-
-    } /* for each U-segment... */
-}
diff --git a/superlu/dreadhb.c b/superlu/dreadhb.c
deleted file mode 100644
index fb22c543..00000000
--- a/superlu/dreadhb.c
+++ /dev/null
@@ -1,277 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-
-
-/* Eat up the rest of the current line */
-int dDumpLine(FILE *fp)
-{
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-}
-
-int dParseIntFormat(char *buf, int *num, int *size)
-{
-    char *tmp;
-
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-}
-
-int dParseFloatFormat(char *buf, int *num, int *size)
-{
-    char *tmp, *period;
-    
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-
-    return 0;
-}
-
-int dReadVector(FILE *fp, int n, int *where, int perline, int persize)
-{
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-
-    return 0;
-}
-
-int dReadValues(FILE *fp, int n, double *destination, int perline, int persize)
-{
-    register int i, j, k, s;
-    char tmp, buf[100], *dummy;
-    
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           s = j*persize;
-           for (k = 0; k < persize; ++k) /* No D_ format in C */
-               if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-           destination[i++] = atof(&buf[s]);
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-       }
-    }
-
-    return 0;
-}
-
-
-
-void
-dreadhb(int *nrow, int *ncol, int *nonz,
-       double **nzval, int **rowind, int **colptr)
-{
-/* 
- * Purpose
- * =======
- * 
- * Read a DOUBLE PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
(VALFMT) 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-
-    register int i, numer_lines = 0, rhscrd = 0;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize, dummy;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-
-    fp = stdin;
-
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    fputs(buf, stdout);
-#if 0
-    dummy = fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    dDumpLine(fp);
-#endif
-
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    dDumpLine(fp);
-
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-#endif
-    
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    dDumpLine(fp);
-
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    dallocateA(*ncol, *nonz, nzval, rowind, colptr);
-
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    dParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    dParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    dParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    dDumpLine(fp);
-
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) dDumpLine(fp); /* skip RHSFMT */
-    
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-#endif
-    
-    dReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    dReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        dReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    
-    fclose(fp);
-
-}
-
diff --git a/superlu/dsnode_bmod.c b/superlu/dsnode_bmod.c
deleted file mode 100644
index e2bac68c..00000000
--- a/superlu/dsnode_bmod.c
+++ /dev/null
@@ -1,114 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
-
-/*
- * Performs numeric block updates within the relaxed snode. 
- */
-int
-dsnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           double     *dense,    /* in */
-           double     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-{
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int            incx = 1, incy = 1;
-    double         alpha = -1.0, beta = 1.0;
-#endif
-
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    double         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-
-    nextlu = xlusup[jcol];
-    
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-       dense[irow] = 0;
-       ++nextlu;
-    }
-
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    
-    if ( fsupc < jcol ) {
-
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       dtrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       dgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       dlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       dmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[iptr++] -= tempv[i];
-           tempv[i] = 0.0;
-       }
-#endif
-
-    }
-
-    return 0;
-}
diff --git a/superlu/dsnode_dfs.c b/superlu/dsnode_dfs.c
deleted file mode 100644
index d1c3c483..00000000
--- a/superlu/dsnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_ddefs.h"
-
-int
-dsnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-{
-/* Purpose
- * =======
- *    dsnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = dLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-
-    return 0;
-}
-
diff --git a/superlu/dsp_blas2.c b/superlu/dsp_blas2.c
deleted file mode 100644
index 5ef8c228..00000000
--- a/superlu/dsp_blas2.c
+++ /dev/null
@@ -1,498 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-/*
- * File name:          dsp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "slu_ddefs.h"
-extern void dtrsv_();
-extern void dgemv_();
-
-/* 
- * Function prototypes 
- */
-void dusolve(int, int, double*, double*);
-void dlsolve(int, int, double*, double*);
-void dmatvec(int, int, int, double*, double*, double*);
-
-
-int
-sp_dtrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, double *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_dtrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_D, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_D, Mtype = TRU.
- *    
- *   x       - (input/output) double*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    double alpha = 1.0, beta = 1.0;
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    double *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_dtrsv", &i);
-       return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = doubleCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_dtrsv().");
-    
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-
-               solve_ops += nsupc * (nsupc - 1);
-               solve_ops += 2 * nrow * nsupc;
-
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       x[irow] -= x[fsupc] * Lval[luptr];
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   dgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-                   dlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-               
-                   dmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif         
-               
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       x[irow] -= work[i];     /* Scatter */
-                       work[i] = 0.0;
-
-                   }
-               }
-           } /* for k ... */
-           
-       } else {
-           /* Form x := inv(U)*x */
-           
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               
-               solve_ops += nsupc * (nsupc + 1);
-
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       x[irow] -= x[fsupc] * Uval[i];
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-#else
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else          
-                   dusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif         
-
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                           x[irow] -= x[jcol] * Uval[i];
-                       }
-                    }
-               }
-           } /* for k ... */
-           
-       }
-    } else { /* Form x := inv(A')*x */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               solve_ops += 2 * (nsupr - nsupc) * nsupc;
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       x[jcol] -= x[irow] * Lval[i];
-                       iptr++;
-                   }
-               }
-               
-               if ( nsupc > 1 ) {
-                   solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#else
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#endif
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       x[jcol] -= x[irow] * Uval[i];
-                   }
-               }
-
-               solve_ops += nsupc * (nsupc + 1);
-
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#else
-                   if (nsupr < nsupc) {
-                     fprintf(stderr, "BAD ARGUMENT for dtrsv: N=%d LDA=%d\n", 
nsupc, nsupr);
-                     return (*info = -10000000);
-                   }
-                   dtrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-               }
-           } /* for k ... */
-       }
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-
-int
-sp_dgemv(char *trans, double alpha, SuperMatrix *A, double *x, 
-        int incx, double beta, double *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_dgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) double
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    X      - (input) double*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) double
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) double*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-            
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    double   *Aval;
-    int info;
-    double temp;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_dgemv ", &info);
-       return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
-       return 0;
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if (beta != 1.) {
-       if (incy == 1) {
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) y[i] = 0.;
-           else
-               for (i = 0; i < leny; ++i) y[i] = beta * y[i];
-       } else {
-           iy = ky;
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = 0.;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = beta * y[iy];
-                   iy += incy;
-               }
-       }
-    }
-    
-    if (alpha == 0.) return 0;
-
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if (x[jx] != 0.) {
-                   temp = alpha * x[jx];
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       y[irow] += temp * Aval[i];
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = 0.;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   temp += Aval[i] * x[irow];
-               }
-               y[jy] += alpha * temp;
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;
-} /* sp_dgemv */
-
-
-
diff --git a/superlu/dsp_blas3.c b/superlu/dsp_blas3.c
deleted file mode 100644
index 00d200a6..00000000
--- a/superlu/dsp_blas3.c
+++ /dev/null
@@ -1,141 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-
-#include "slu_ddefs.h"
-
-int
-sp_dgemm(char *transa, char *transb, int m, int n, int k, 
-         double alpha, SuperMatrix *A, double *b, int ldb, 
-         double beta, double *c, int ldc)
-{
-/*  Purpose   
-    =======   
-
-    sp_d performs one of the matrix-matrix operations   
-
-       C := alpha*op( A )*op( B ) + beta*C,   
-
-    where  op( X ) is one of 
-
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-  
-
-    Parameters   
-    ==========   
-
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-            
-    ALPHA  - (input) double
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_D; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    B      - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-
-    BETA   - (input) double
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-
-    C      - DOUBLE PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-
-    ==== Sparse Level 3 Blas routine.   
-*/
-    int    incx = 1, incy = 1;
-    int    j;
-
-    for (j = 0; j < n; ++j) {
-       sp_dgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
-}
diff --git a/superlu/dutil.c b/superlu/dutil.c
deleted file mode 100644
index bb4c5c4f..00000000
--- a/superlu/dutil.c
+++ /dev/null
@@ -1,479 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-
-#include <math.h>
-#include "slu_ddefs.h"
-
-void
-dCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      double *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NCformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-}
-
-void
-dCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      double *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NRformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-}
-
-/* Copy matrix A into matrix B. */
-void
-dCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((double *)Bstore->nzval)[i] = ((double *)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-}
-
-
-void
-dCreate_Dense_Matrix(SuperMatrix *X, int m, int n, double *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    DNformat    *Xstore;
-    
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (double *) x;
-}
-
-void
-dCopy_Dense_Matrix(int M, int N, double *X, int ldx,
-                       double *Y, int ldy)
-{
-/*
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-}
-
-void
-dCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       double *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    SCformat *Lstore;
-
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
-
-}
-
-
-/*
- * Convert a row compressed storage into a column compressed storage.
- */
-void
-dCompRow_to_CompCol(int m, int n, int nnz, 
-                   double *a, int *colind, int *rowptr,
-                   double **at, int **rowind, int **colptr)
-{
-    register int i, j, col, relpos;
-    int *marker;
-
-    /* Allocate storage for another copy of the matrix. */
-    *at = (double *) doubleMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-
-    SUPERLU_FREE(marker);
-}
-
-
-void
-dPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-{
-    NCformat     *Astore;
-    register int i,n;
-    double       *dp;
-    
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-dPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-{
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    double       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-#endif
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-dPrint_Dense_Matrix(char *what, SuperMatrix *A)
-{
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    double       *dp;
-    
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
-}
-
-/*
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-void
-dprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-{
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    double  *lusup;
-    int     *xlusup;
-    double  *ucol;
-    int     *usub, *xusub;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f\n", usub[i], ucol[i]);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
-       i++; k++;
-    }
-    fflush(stdout);
-}
-
-
-/*
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void dcheck_tempv(int n, double *tempv)
-{
-    int i;
-       
-    for (i = 0; i < n; i++) {
-       if (tempv[i] != 0.0) 
-       {
-           fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
-           ABORT("dcheck_tempv");
-       }
-    }
-}
-
-
-void
-dGenXtrue(int n, int nrhs, double *x, int ldx)
-{
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx] = 1.0;/* + (double)(i+1.)/n;*/
-       }
-}
-
-/*
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-void
-dFillRHS(trans_t trans, int nrhs, double *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore;
-    double   *Aval;
-    DNformat *Bstore;
-    double   *rhs;
-    double one = 1.0;
-    double zero = 0.0;
-    int      ldc;
-    char transc[1];
-
-    Astore = A->Store;
-    Aval   = (double *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-
-    sp_dgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
-
-}
-
-/* 
- * Fills a double precision array with a given value.
- */
-void 
-dfill(double *a, int alen, double dval)
-{
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
-}
-
-
-
-/* 
- * Check the inf-norm of the error vector 
- */
-void dinf_norm_error(int nrhs, SuperMatrix *X, double *xtrue)
-{
-    DNformat *Xstore;
-    double err, xnorm;
-    double *Xmat, *soln_work;
-    int i, j;
-
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-       err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
-       xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-}
-
-
-
-/* Print performance of the code. */
-void
-dPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           double rpg, double rcond, double *ferr,
-           double *berr, char *equed, SuperLUStat_t *stat)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    
-    utime = stat->utime;
-    ops   = stat->ops;
-    
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-       
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-       
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-    
-}
-
-
-
-
-int print_double_vec(char *what, int n, double *vec)
-{
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
-    return 0;
-}
-
diff --git a/superlu/dzsum1.c b/superlu/dzsum1.c
deleted file mode 100644
index d968326a..00000000
--- a/superlu/dzsum1.c
+++ /dev/null
@@ -1,102 +0,0 @@
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-
-double dzsum1_(int *n, doublecomplex *cx, int *incx)
-{
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*! @file dzsum1.c
- * \brief Takes sum of the absolute values of a complex vector and returns a 
double precision result
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-/*
-
-
-
-    Purpose   
-    =======   
-
-    DZSUM1 takes the sum of the absolute values of a complex   
-    vector and returns a double precision result.   
-
-    Based on DZASUM from the Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-
-    Contributed by Nick Higham for use with ZLACON.   
-
-    Arguments   
-    =========   
-
-    N       (input) INT   
-            The number of elements in the vector CX.   
-
-    CX      (input) COMPLEX*16 array, dimension (N)   
-            The vector whose elements will be summed.   
-
-    INCX    (input) INT   
-            The spacing between successive values of CX.  INCX > 0.   
-
-    ===================================================================== 
-*/  
-
-    /* Builtin functions */
-    double z_abs(doublecomplex *);
-    
-    /* Local variables */
-    int i, nincx;
-    double stemp;
-
-
-#define CX(I) cx[(I)-1]
-
-    stemp = 0.;
-    if (*n <= 0) {
-       return stemp;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-    /*     CODE FOR INCREMENT NOT EQUAL TO 1 */
-
-    nincx = *n * *incx;
-    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
-
-       /*        NEXT LINE MODIFIED. */
-
-       stemp += z_abs(&CX(i));
-/* L10: */
-    }
-    
-    return stemp;
-
-    /*     CODE FOR INCREMENT EQUAL TO 1 */
-
-L20:
-    for (i = 1; i <= *n; ++i) {
-
-       /*        NEXT LINE MODIFIED. */
-
-       stemp += z_abs(&CX(i));
-/* L30: */
-    }
-    
-    return stemp;
-
-    /*     End of DZSUM1 */
-
-} /* dzsum1_ */
-
diff --git a/superlu/f2c_lite.c b/superlu/f2c_lite.c
deleted file mode 100644
index 7dfb0d75..00000000
--- a/superlu/f2c_lite.c
+++ /dev/null
@@ -1,391 +0,0 @@
-/*
-  Copyright: 1992-2007 The University of Tennessee.  All rights reserved.
-  License:
-    LAPACK is a freely-available software package. It is available from
-    netlib via anonymous ftp and the World Wide Web. Thus, it can be
-    included in commercial software packages (and has been). We only ask
-    that proper credit be given to the authors.
- 
-    Like all software, it is copyrighted. It is not trademarked, but we do
-    ask the following:
-
-    If you modify the source for these routines we ask that you change the
-    name of the routine and comment the changes made to the original.
-*/
-
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "BLAS_f2c.h"
-
-
-extern void s_wsfe() {;}
-extern void e_wsfe() {;}
-extern void do_fio() {;}
-
-
-
-#ifdef KR_headers
-extern double sqrt();
-double f__cabs(real, imag) double real, imag;
-#else
-#undef abs
-
-double f__cabs(double real, double imag)
-#endif
-{
-double temp;
-
-if(real < 0)
-       real = -real;
-if(imag < 0)
-       imag = -imag;
-if(imag > real){
-       temp = real;
-       real = imag;
-       imag = temp;
-}
-if((imag+real) == real)
-       return((double)real);
-
-temp = imag/real;
-temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
-return(temp);
-}
-
-
-#define log10e 0.43429448190325182765
-
-#ifdef KR_headers
-double log();
-double d_lg10(x) doublereal *x;
-#else
-#undef abs
-
-double d_lg10(doublereal *x)
-#endif
-{
-return( log10e * log(*x) );
-}
-
-
-#ifdef KR_headers
-double d_sign(a,b) doublereal *a, *b;
-#else
-double d_sign(doublereal *a, doublereal *b)
-#endif
-{
-double x;
-x = (*a >= 0 ? *a : - *a);
-return( *b >= 0 ? x : -x);
-}
-
-
-#ifdef KR_headers
-double floor();
-integer i_dnnt(x) doublereal *x;
-#else
-#undef abs
-
-integer i_dnnt(doublereal *x)
-#endif
-{
-return( (*x)>=0 ?
-       floor(*x + .5) : -floor(.5 - *x) );
-}
-
-
-#ifdef KR_headers
-double pow();
-double pow_dd(ap, bp) doublereal *ap, *bp;
-#else
-#undef abs
-
-double pow_dd(doublereal *ap, doublereal *bp)
-#endif
-{
-return(pow(*ap, *bp) );
-}
-
-/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the
- * target of a concatenation to appear on its right-hand side (contrary
- * to the Fortran 77 Standard, but in accordance with Fortran 90).
- */
-#define NO_OVERWRITE
-
-
-#ifndef NO_OVERWRITE
-
-#undef abs
-#ifdef KR_headers
- extern char *F77_aloc();
- extern void free();
- extern void exit_();
-#else
-
- extern char *F77_aloc(ftnlen, char*);
-#endif
-
-#endif /* NO_OVERWRITE */
-
- VOID
-#ifdef KR_headers
-s_cat(lp, rpp, rnp, np, ll) char *lp, *rpp[]; ftnlen rnp[], *np, ll;
-#else
-s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen *np, ftnlen ll)
-#endif
-{
-       ftnlen i, nc;
-       char *rp;
-       ftnlen n = *np;
-#ifndef NO_OVERWRITE
-       ftnlen L, m;
-       char *lp0, *lp1;
-
-       lp0 = 0;
-       lp1 = lp;
-       L = ll;
-       i = 0;
-       while(i < n) {
-               rp = rpp[i];
-               m = rnp[i++];
-               if (rp >= lp1 || rp + m <= lp) {
-                       if ((L -= m) <= 0) {
-                               n = i;
-                               break;
-                               }
-                       lp1 += m;
-                       continue;
-                       }
-               lp0 = lp;
-               lp = lp1 = F77_aloc(L = ll, "s_cat");
-               break;
-               }
-       lp1 = lp;
-#endif /* NO_OVERWRITE */
-       for(i = 0 ; i < n ; ++i) {
-               nc = ll;
-               if(rnp[i] < nc)
-                       nc = rnp[i];
-               ll -= nc;
-               rp = rpp[i];
-               while(--nc >= 0)
-                       *lp++ = *rp++;
-               }
-       while(--ll >= 0)
-               *lp++ = ' ';
-#ifndef NO_OVERWRITE
-       if (lp0) {
-               memmove(lp0, lp1, L);
-               free(lp1);
-               }
-#endif
-       }
-
-
-/* compare two strings */
-
-#ifdef KR_headers
-integer s_cmp(a0, b0, la, lb) char *a0, *b0; ftnlen la, lb;
-#else
-integer s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb)
-#endif
-{
-register unsigned char *a, *aend, *b, *bend;
-a = (unsigned char *)a0;
-b = (unsigned char *)b0;
-aend = a + la;
-bend = b + lb;
-
-if(la <= lb)
-       {
-       while(a < aend)
-               if(*a != *b)
-                       return( *a - *b );
-               else
-                       { ++a; ++b; }
-
-       while(b < bend)
-               if(*b != ' ')
-                       return( ' ' - *b );
-               else    ++b;
-       }
-
-else
-       {
-       while(b < bend)
-               if(*a == *b)
-                       { ++a; ++b; }
-               else
-                       return( *a - *b );
-       while(a < aend)
-               if(*a != ' ')
-                       return(*a - ' ');
-               else    ++a;
-       }
-return(0);
-}
-/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the
- * target of an assignment to appear on its right-hand side (contrary
- * to the Fortran 77 Standard, but in accordance with Fortran 90),
- * as in  a(2:5) = a(4:7) .
- */
-
-
-
-/* assign strings:  a = b */
-
-#ifdef KR_headers
-VOID s_copy(a, b, la, lb) register char *a, *b; ftnlen la, lb;
-#else
-void s_copy(register char *a, register char *b, ftnlen la, ftnlen lb)
-#endif
-{
-       register char *aend, *bend;
-
-       aend = a + la;
-
-       if(la <= lb)
-#ifndef NO_OVERWRITE
-               if (a <= b || a >= b + la)
-#endif
-                       while(a < aend)
-                               *a++ = *b++;
-#ifndef NO_OVERWRITE
-               else
-                       for(b += la; a < aend; )
-                               *--aend = *--b;
-#endif
-
-       else {
-               bend = b + lb;
-#ifndef NO_OVERWRITE
-               if (a <= b || a >= bend)
-#endif
-                       while(b < bend)
-                               *a++ = *b++;
-#ifndef NO_OVERWRITE
-               else {
-                       a += lb;
-                       while(b < bend)
-                               *--a = *--bend;
-                       a += lb;
-                       }
-#endif
-               while(a < aend)
-                       *a++ = ' ';
-               }
-       }
-
-
-#ifdef KR_headers
-double sqrt(), f__cabs();
-VOID z_sqrt(r, z) doublecomplex *r, *z;
-#else
-#undef abs
-
-extern double f__cabs(double, double);
-void z_sqrt(doublecomplex *r, doublecomplex *z)
-#endif
-{
-double mag;
-
-if( (mag = f__cabs(z->r, z->i)) == 0.)
-       r->r = r->i = 0.;
-else if(z->r > 0)
-       {
-       r->r = sqrt(0.5 * (mag + z->r) );
-       r->i = z->i / r->r / 2;
-       }
-else
-       {
-       r->i = sqrt(0.5 * (mag - z->r) );
-       if(z->i < 0)
-               r->i = - r->i;
-       r->r = z->i / r->i / 2;
-       }
-}
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef KR_headers
-integer pow_ii(ap, bp) integer *ap, *bp;
-#else
-integer pow_ii(integer *ap, integer *bp)
-#endif
-{
-       integer pow, x, n;
-       unsigned long u;
-
-       x = *ap;
-       n = *bp;
-
-       if (n <= 0) {
-               if (n == 0 || x == 1)
-                       return 1;
-               if (x != -1)
-                       return x == 0 ? 1/x : 0;
-               n = -n;
-               }
-       u = n;
-       for(pow = 1; ; )
-               {
-               if(u & 01)
-                       pow *= x;
-               if(u >>= 1)
-                       x *= x;
-               else
-                       break;
-               }
-       return(pow);
-       }
-#ifdef __cplusplus
-}
-#endif
-
-#ifdef KR_headers
-extern void f_exit();
-VOID s_stop(s, n) char *s; ftnlen n;
-#else
-#undef abs
-#undef min
-#undef max
-#ifdef __cplusplus
-extern "C" {
-#endif
-#ifdef __cplusplus
-extern "C" {
-#endif
-void f_exit(void);
-
-int s_stop(char *s, ftnlen n)
-#endif
-{
-int i;
-
-if(n > 0)
-       {
-       fprintf(stderr, "STOP ");
-       for(i = 0; i<n ; ++i)
-               putc(*s++, stderr);
-       fprintf(stderr, " statement executed\n");
-       }
-#ifdef NO_ONEXIT
-f_exit();
-#endif
-exit(0);
-
-/* We cannot avoid (useless) compiler diagnostics here:                */
-/* some compilers complain if there is no return statement,    */
-/* and others complain that this one cannot be reached.                */
-
-return 0; /* NOT REACHED */
-}
-#ifdef __cplusplus
-}
-#endif
-#ifdef __cplusplus
-}
-#endif
diff --git a/superlu/get_perm_c.c b/superlu/get_perm_c.c
deleted file mode 100644
index dd6558ac..00000000
--- a/superlu/get_perm_c.c
+++ /dev/null
@@ -1,472 +0,0 @@
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_ddefs.h"
-#include "colamd.h"
-
-extern int  genmmd_(int *, int *, int *, int *, int *, int *, int *, 
-                   int *, int *, int *, int *, int *);
-
-void
-get_colamd(
-          const int m,  /* number of rows in matrix A. */
-          const int n,  /* number of columns in matrix A. */
-          const int nnz,/* number of nonzeros in matrix A. */
-          int *colptr,  /* column pointer of size n+1 for matrix A. */
-          int *rowind,  /* row indices of size nz for matrix A. */
-          int *perm_c   /* out - the column permutation vector. */
-          )
-{
-    int Alen, *A, i, info, *p;
-    double knobs[COLAMD_KNOBS];
-    int stats[COLAMD_STATS];
-
-    Alen = colamd_recommended(nnz, m, n);
-
-    colamd_set_defaults(knobs);
-
-    if (!(A = (int *) SUPERLU_MALLOC(Alen * sizeof(int))) )
-        ABORT("Malloc fails for A[]");
-    if (!(p = (int *) SUPERLU_MALLOC((n+1) * sizeof(int))) )
-        ABORT("Malloc fails for p[]");
-    for (i = 0; i <= n; ++i) p[i] = colptr[i];
-    for (i = 0; i < nnz; ++i) A[i] = rowind[i];
-    info = colamd(m, n, Alen, A, p, knobs, stats);
-    if ( info == FALSE ) ABORT("COLAMD failed");
-
-    for (i = 0; i < n; ++i) perm_c[p[i]] = i;
-
-    SUPERLU_FREE(A);
-    SUPERLU_FREE(p);
-}
-
-void
-getata(
-       const int m,      /* number of rows in matrix A. */
-       const int n,      /* number of columns in matrix A. */
-       const int nz,     /* number of nonzeros in matrix A */
-       int *colptr,      /* column pointer of size n+1 for matrix A. */
-       int *rowind,      /* row indices of size nz for matrix A. */
-       int *atanz,       /* out - on exit, returns the actual number of
-                            nonzeros in matrix A'*A. */
-       int **ata_colptr, /* out - size n+1 */
-       int **ata_rowind  /* out - size *atanz */
-       )
-/*
- * Purpose
- * =======
- *
- * Form the structure of A'*A. A is an m-by-n matrix in column oriented
- * format represented by (colptr, rowind). The output A'*A is in column
- * oriented format (symmetrically, also row oriented), represented by
- * (ata_colptr, ata_rowind).
- *
- * This routine is modified from GETATA routine by Tim Davis.
- * The complexity of this algorithm is: SUM_{i=1,m} r(i)^2,
- * i.e., the sum of the square of the row counts.
- *
- * Questions
- * =========
- *     o  Do I need to withhold the *dense* rows?
- *     o  How do I know the number of nonzeros in A'*A?
- * 
- */
-{
-    register int i, j, k, col, num_nz, ti, trow;
-    int *marker, *b_colptr, *b_rowind;
-    int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
-
-    if ( !(marker = (int*) SUPERLU_MALLOC((SUPERLU_MAX(m,n)+1)*sizeof(int))) )
-       ABORT("SUPERLU_MALLOC fails for marker[]");
-    if ( !(t_colptr = (int*) SUPERLU_MALLOC((m+1) * sizeof(int))) )
-       ABORT("SUPERLU_MALLOC t_colptr[]");
-    if ( !(t_rowind = (int*) SUPERLU_MALLOC(nz * sizeof(int))) )
-       ABORT("SUPERLU_MALLOC fails for t_rowind[]");
-
-    
-    /* Get counts of each column of T, and set up column pointers */
-    for (i = 0; i < m; ++i) marker[i] = 0;
-    for (j = 0; j < n; ++j) {
-       for (i = colptr[j]; i < colptr[j+1]; ++i)
-           ++marker[rowind[i]];
-    }
-    t_colptr[0] = 0;
-    for (i = 0; i < m; ++i) {
-       t_colptr[i+1] = t_colptr[i] + marker[i];
-       marker[i] = t_colptr[i];
-    }
-
-    /* Transpose the matrix from A to T */
-    for (j = 0; j < n; ++j)
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           col = rowind[i];
-           t_rowind[marker[col]] = j;
-           ++marker[col];
-       }
-
-    
-    /* ----------------------------------------------------------------
-       compute B = T * A, where column j of B is:
-
-       Struct (B_*j) =    UNION   ( Struct (T_*k) )
-                        A_kj != 0
-
-       do not include the diagonal entry
-   
-       ( Partition A as: A = (A_*1, ..., A_*n)
-         Then B = T * A = (T * A_*1, ..., T * A_*n), where
-         T * A_*j = (T_*1, ..., T_*m) * A_*j.  )
-       ---------------------------------------------------------------- */
-
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-
-    /* First pass determines number of nonzeros in B */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           /* A_kj is nonzero, add pattern of column T_*k to B_*j */
-           k = rowind[i];
-           for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
-               trow = t_rowind[ti];
-               if ( marker[trow] != j ) {
-                   marker[trow] = j;
-                   num_nz++;
-               }
-           }
-       }
-    }
-    *atanz = num_nz;
-    
-    /* Allocate storage for A'*A */
-    if ( !(*ata_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for ata_colptr[]");
-    if ( *atanz ) {
-       if ( !(*ata_rowind = (int*) SUPERLU_MALLOC( *atanz * sizeof(int)) ) )
-           ABORT("SUPERLU_MALLOC fails for ata_rowind[]");
-    }
-    b_colptr = *ata_colptr; /* aliasing */
-    b_rowind = *ata_rowind;
-    
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-    
-    /* Compute each column of B, one at a time */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       b_colptr[j] = num_nz;
-       
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           /* A_kj is nonzero, add pattern of column T_*k to B_*j */
-           k = rowind[i];
-           for (ti = t_colptr[k]; ti < t_colptr[k+1]; ++ti) {
-               trow = t_rowind[ti];
-               if ( marker[trow] != j ) {
-                   marker[trow] = j;
-                   b_rowind[num_nz++] = trow;
-               }
-           }
-       }
-    }
-    b_colptr[n] = num_nz;
-       
-    SUPERLU_FREE(marker);
-    SUPERLU_FREE(t_colptr);
-    SUPERLU_FREE(t_rowind);
-}
-
-
-void
-at_plus_a(
-         const int n,      /* number of columns in matrix A. */
-         const int nz,     /* number of nonzeros in matrix A */
-         int *colptr,      /* column pointer of size n+1 for matrix A. */
-         int *rowind,      /* row indices of size nz for matrix A. */
-         int *bnz,         /* out - on exit, returns the actual number of
-                               nonzeros in matrix A'*A. */
-         int **b_colptr,   /* out - size n+1 */
-         int **b_rowind    /* out - size *bnz */
-         )
-{
-/*
- * Purpose
- * =======
- *
- * Form the structure of A'+A. A is an n-by-n matrix in column oriented
- * format represented by (colptr, rowind). The output A'+A is in column
- * oriented format (symmetrically, also row oriented), represented by
- * (b_colptr, b_rowind).
- *
- */
-    register int i, j, k, col, num_nz;
-    int *t_colptr, *t_rowind; /* a column oriented form of T = A' */
-    int *marker;
-
-    if ( !(marker = (int*) SUPERLU_MALLOC( n * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for marker[]");
-    if ( !(t_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for t_colptr[]");
-    if ( !(t_rowind = (int*) SUPERLU_MALLOC( nz * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails t_rowind[]");
-
-    
-    /* Get counts of each column of T, and set up column pointers */
-    for (i = 0; i < n; ++i) marker[i] = 0;
-    for (j = 0; j < n; ++j) {
-       for (i = colptr[j]; i < colptr[j+1]; ++i)
-           ++marker[rowind[i]];
-    }
-    t_colptr[0] = 0;
-    for (i = 0; i < n; ++i) {
-       t_colptr[i+1] = t_colptr[i] + marker[i];
-       marker[i] = t_colptr[i];
-    }
-
-    /* Transpose the matrix from A to T */
-    for (j = 0; j < n; ++j)
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           col = rowind[i];
-           t_rowind[marker[col]] = j;
-           ++marker[col];
-       }
-
-
-    /* ----------------------------------------------------------------
-       compute B = A + T, where column j of B is:
-
-       Struct (B_*j) = Struct (A_*k) UNION Struct (T_*k)
-
-       do not include the diagonal entry
-       ---------------------------------------------------------------- */
-
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-
-    /* First pass determines number of nonzeros in B */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-
-       /* Add pattern of column A_*k to B_*j */
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           k = rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               ++num_nz;
-           }
-       }
-
-       /* Add pattern of column T_*k to B_*j */
-       for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
-           k = t_rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               ++num_nz;
-           }
-       }
-    }
-    *bnz = num_nz;
-    
-    /* Allocate storage for A+A' */
-    if ( !(*b_colptr = (int*) SUPERLU_MALLOC( (n+1) * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for b_colptr[]");
-    if ( *bnz) {
-      if ( !(*b_rowind = (int*) SUPERLU_MALLOC( *bnz * sizeof(int)) ) )
-       ABORT("SUPERLU_MALLOC fails for b_rowind[]");
-    }
-    
-    /* Zero the diagonal flag */
-    for (i = 0; i < n; ++i) marker[i] = -1;
-    
-    /* Compute each column of B, one at a time */
-    num_nz = 0;
-    for (j = 0; j < n; ++j) {
-       (*b_colptr)[j] = num_nz;
-       
-       /* Flag the diagonal so it's not included in the B matrix */
-       marker[j] = j;
-
-       /* Add pattern of column A_*k to B_*j */
-       for (i = colptr[j]; i < colptr[j+1]; ++i) {
-           k = rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               (*b_rowind)[num_nz++] = k;
-           }
-       }
-
-       /* Add pattern of column T_*k to B_*j */
-       for (i = t_colptr[j]; i < t_colptr[j+1]; ++i) {
-           k = t_rowind[i];
-           if ( marker[k] != j ) {
-               marker[k] = j;
-               (*b_rowind)[num_nz++] = k;
-           }
-       }
-    }
-    (*b_colptr)[n] = num_nz;
-       
-    SUPERLU_FREE(marker);
-    SUPERLU_FREE(t_colptr);
-    SUPERLU_FREE(t_rowind);
-}
-
-void
-get_perm_c(int ispec, SuperMatrix *A, int *perm_c)
-/*
- * Purpose
- * =======
- *
- * GET_PERM_C obtains a permutation matrix Pc, by applying the multiple
- * minimum degree ordering code by Joseph Liu to matrix A'*A or A+A'.
- * or using approximate minimum degree column ordering by Davis et. al.
- * The LU factorization of A*Pc tends to have less fill than the LU 
- * factorization of A.
- *
- * Arguments
- * =========
- *
- * ispec   (input) int
- *         Specifies the type of column ordering to reduce fill:
- *         = 1: minimum degree on the structure of A^T * A
- *         = 2: minimum degree on the structure of A^T + A
- *         = 3: approximate minimum degree for unsymmetric matrices
- *         If ispec == 0, the natural ordering (i.e., Pc = I) is returned.
- * 
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A 
- *         can be: Stype = NC; Dtype = _D; Mtype = GE. In the future,
- *         more general A can be handled.
- *
- * perm_c  (output) int*
- *        Column permutation vector of size A->ncol, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- */
-{
-    NCformat *Astore = A->Store;
-    int m, n, bnz = 0, *b_colptr, i;
-    int delta, maxint, nofsub, *invp;
-    int *b_rowind, *dhead, *qsize, *llist, *marker;
-    double t, SuperLU_timer_();
-    
-    m = A->nrow;
-    n = A->ncol;
-
-    t = SuperLU_timer_();
-    switch ( ispec ) {
-        case 0: /* Natural ordering */
-             for (i = 0; i < n; ++i) perm_c[i] = i;
-#if ( PRNTlevel>=1 )
-             printf("Use natural column ordering.\n");
-#endif
-             return;
-        case 1: /* Minimum degree ordering on A'*A */
-             getata(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
-                    &bnz, &b_colptr, &b_rowind);
-#if ( PRNTlevel>=1 )
-             printf("Use minimum degree ordering on A'*A.\n");
-#endif
-             t = SuperLU_timer_() - t;
-             /*printf("Form A'*A time = %8.3f\n", t);*/
-             break;
-        case 2: /* Minimum degree ordering on A'+A */
-             if ( m != n ) ABORT("Matrix is not square");
-             at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
-                       &bnz, &b_colptr, &b_rowind);
-#if ( PRNTlevel>=1 )
-             printf("Use minimum degree ordering on A'+A.\n");
-#endif
-             t = SuperLU_timer_() - t;
-             /*printf("Form A'+A time = %8.3f\n", t);*/
-             break;
-        case 3: /* Approximate minimum degree column ordering. */
-             get_colamd(m, n, Astore->nnz, Astore->colptr, Astore->rowind,
-                        perm_c);
-#if ( PRNTlevel>=1 )
-             printf(".. Use approximate minimum degree column ordering.\n");
-#endif
-             return; 
-        default:
-             ABORT("Invalid ISPEC");
-    }
-
-    if ( bnz != 0 ) {
-       t = SuperLU_timer_();
-
-       /* Initialize and allocate storage for GENMMD. */
-       delta = 1; /* DELTA is a parameter to allow the choice of nodes
-                     whose degree <= min-degree + DELTA. */
-       maxint = 2147483647; /* 2**31 - 1 */
-       invp = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
-       if ( !invp ) ABORT("SUPERLU_MALLOC fails for invp.");
-       dhead = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
-       if ( !dhead ) ABORT("SUPERLU_MALLOC fails for dhead.");
-       qsize = (int *) SUPERLU_MALLOC((n+delta)*sizeof(int));
-       if ( !qsize ) ABORT("SUPERLU_MALLOC fails for qsize.");
-       llist = (int *) SUPERLU_MALLOC(n*sizeof(int));
-       if ( !llist ) ABORT("SUPERLU_MALLOC fails for llist.");
-       marker = (int *) SUPERLU_MALLOC(n*sizeof(int));
-       if ( !marker ) ABORT("SUPERLU_MALLOC fails for marker.");
-
-       /* Transform adjacency list into 1-based indexing required by GENMMD.*/
-       for (i = 0; i <= n; ++i) ++b_colptr[i];
-       for (i = 0; i < bnz; ++i) ++b_rowind[i];
-       
-       genmmd_(&n, b_colptr, b_rowind, perm_c, invp, &delta, dhead, 
-               qsize, llist, marker, &maxint, &nofsub);
-
-       /* Transform perm_c into 0-based indexing. */
-       for (i = 0; i < n; ++i) --perm_c[i];
-
-       SUPERLU_FREE(invp);
-       SUPERLU_FREE(dhead);
-       SUPERLU_FREE(qsize);
-       SUPERLU_FREE(llist);
-       SUPERLU_FREE(marker);
-       SUPERLU_FREE(b_rowind);
-
-       t = SuperLU_timer_() - t;
-       /*  printf("call GENMMD time = %8.3f\n", t);*/
-
-    } else { /* Empty adjacency structure */
-       for (i = 0; i < n; ++i) perm_c[i] = i;
-    }
-
-    SUPERLU_FREE(b_colptr);
-}
diff --git a/superlu/heap_relax_snode.c b/superlu/heap_relax_snode.c
deleted file mode 100644
index 80ed279b..00000000
--- a/superlu/heap_relax_snode.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_ddefs.h"
-
-void
-heap_relax_snode (
-            const     int n,
-            int       *et,           /* column elimination tree */
-            const int relax_columns, /* max no of columns allowed in a
-                                        relaxed snode */
-            int       *descendants,  /* no of descendants of each node
-                                        in the etree */
-            int       *relax_end     /* last column in a supernode */
-            )
-{
-/*
- * Purpose
- * =======
- *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
- *    the matrix has been reordered according to the postorder of the etree.
- *
- */ 
-    register int i, j, k, l, parent;
-    register int snode_start;  /* beginning of a snode */
-    int *et_save, *post, *inv_post, *iwork;
-    int nsuper_et = 0, nsuper_et_post = 0;
-
-    /* The etree may not be postordered, but is heap ordered. */
-
-    iwork = (int*) intMalloc(3*n+2); 
-    if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
-    inv_post = iwork + n+1;
-    et_save = inv_post + n+1;
-
-    /* Post order etree */
-    post = (int *) TreePostorder(n, et);
-    for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
-
-    /* Renumber etree in postorder */
-    for (i = 0; i < n; ++i) {
-        iwork[post[i]] = post[et[i]];
-       et_save[i] = et[i]; /* Save the original etree */
-    }
-    for (i = 0; i < n; ++i) et[i] = iwork[i];
-
-    /* Compute the number of descendants of each node in the etree */
-    ifill (relax_end, n, EMPTY);
-    for (j = 0; j < n; j++) descendants[j] = 0;
-    for (j = 0; j < n; j++) {
-       parent = et[j];
-       if ( parent != n )  /* not the dummy root */
-           descendants[parent] += descendants[j] + 1;
-    }
-
-    /* Identify the relaxed supernodes by postorder traversal of the etree. */
-    for (j = 0; j < n; ) { 
-       parent = et[j];
-        snode_start = j;
-       while ( parent != n && descendants[parent] < relax_columns ) {
-           j = parent;
-           parent = et[j];
-       }
-       /* Found a supernode in postordered etree; j is the last column. */
-       ++nsuper_et_post;
-       k = n;
-       for (i = snode_start; i <= j; ++i)
-           k = SUPERLU_MIN(k, inv_post[i]);
-       l = inv_post[j];
-       if ( (l - k) == (j - snode_start) ) {
-           /* It's also a supernode in the original etree */
-           relax_end[k] = l;           /* Last column is recorded */
-           ++nsuper_et;
-       } else {
-           for (i = snode_start; i <= j; ++i) {
-               l = inv_post[i];
-               if ( descendants[i] == 0 ) relax_end[l] = l;
-           }
-       }
-       j++;
-       /* Search for a new leaf */
-       while ( descendants[j] != 0 && j < n ) j++;
-    }
-
-#if ( PRNTlevel>=1 )
-    printf(".. heap_snode_relax:\n"
-          "\tNo of relaxed snodes in postordered etree:\t%d\n"
-          "\tNo of relaxed snodes in original etree:\t%d\n",
-          nsuper_et_post, nsuper_et);
-#endif
-
-    /* Recover the original etree */
-    for (i = 0; i < n; ++i) et[i] = et_save[i];
-
-    SUPERLU_FREE(post);
-    SUPERLU_FREE(iwork);
-}
-
-
diff --git a/superlu/icmax1.c b/superlu/icmax1.c
deleted file mode 100644
index d5d15411..00000000
--- a/superlu/icmax1.c
+++ /dev/null
@@ -1,124 +0,0 @@
-#include <math.h>
-#include "slu_scomplex.h"
-#include "slu_Cnames.h"
-
-int icmax1_(int *n, complex *cx, int *incx)
-{
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*! @file icmax1.c
- * \brief Finds the index of the element whose real part has maximum absolute 
value
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-/*
-
-    Purpose   
-    =======   
-
-    ICMAX1 finds the index of the element whose real part has maximum   
-    absolute value.   
-
-    Based on ICAMAX from Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-
-    Contributed by Nick Higham for use with CLACON.   
-
-    Arguments   
-    =========   
-
-    N       (input) INT   
-            The number of elements in the vector CX.   
-
-    CX      (input) COMPLEX array, dimension (N)   
-            The vector whose elements will be summed.   
-
-    INCX    (input) INT   
-            The spacing between successive values of CX.  INCX >= 1.   
-
-   ===================================================================== 
-  
-
-
-       NEXT LINE IS THE ONLY MODIFICATION.   
-
-    
-   Parameter adjustments   
-       Function Body */
-    /* System generated locals */
-    int ret_val, i__1, i__2;
-    float r__1;
-    /* Local variables */
-    static float smax;
-    static int i, ix;
-
-
-#define CX(I) cx[(I)-1]
-
-
-    ret_val = 0;
-    if (*n < 1) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L30;
-    }
-
-/*     CODE FOR INCREMENT NOT EQUAL TO 1 */
-
-    ix = 1;
-    smax = (r__1 = CX(1).r, fabs(r__1));
-    ix += *incx;
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = ix;
-       if ((r__1 = CX(ix).r, fabs(r__1)) <= smax) {
-           goto L10;
-       }
-       ret_val = i;
-       i__2 = ix;
-       smax = (r__1 = CX(ix).r, fabs(r__1));
-L10:
-       ix += *incx;
-/* L20: */
-    }
-    return ret_val;
-
-/*     CODE FOR INCREMENT EQUAL TO 1 */
-
-L30:
-    smax = (r__1 = CX(1).r, fabs(r__1));
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = i;
-       if ((r__1 = CX(i).r, fabs(r__1)) <= smax) {
-           goto L40;
-       }
-       ret_val = i;
-       i__2 = i;
-       smax = (r__1 = CX(i).r, fabs(r__1));
-L40:
-       ;
-    }
-    return ret_val;
-
-/*     End of ICMAX1 */
-
-} /* icmax1_ */
-
diff --git a/superlu/izmax1.c b/superlu/izmax1.c
deleted file mode 100644
index 75b3279e..00000000
--- a/superlu/izmax1.c
+++ /dev/null
@@ -1,117 +0,0 @@
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-
-int
-izmax1_(int *n, doublecomplex *cx, int *incx)
-{
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*! @file izmax1.c
- * \brief Finds the index of the element whose real part has maximum absolute 
value
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-/*  
-    Purpose   
-    =======   
-
-    IZMAX1 finds the index of the element whose real part has maximum   
-    absolute value.   
-
-    Based on IZAMAX from Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-
-    Contributed by Nick Higham for use with ZLACON.   
-
-    Arguments   
-    =========   
-
-    N       (input) INT   
-            The number of elements in the vector CX.   
-
-    CX      (input) COMPLEX*16 array, dimension (N)   
-            The vector whose elements will be summed.   
-
-    INCX    (input) INT   
-            The spacing between successive values of CX.  INCX >= 1.   
-
-   ===================================================================== 
-*/  
-
-    /* System generated locals */
-    int ret_val, i__1, i__2;
-    double d__1;
-    
-    /* Local variables */
-    double smax;
-    int i, ix;
-
-#define CX(I) cx[(I)-1]
-
-    ret_val = 0;
-    if (*n < 1) {
-       return ret_val;
-    }
-    ret_val = 1;
-    if (*n == 1) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L30;
-    }
-
-/*     CODE FOR INCREMENT NOT EQUAL TO 1 */
-
-    ix = 1;
-    smax = (d__1 = CX(1).r, fabs(d__1));
-    ix += *incx;
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = ix;
-       if ((d__1 = CX(ix).r, fabs(d__1)) <= smax) {
-           goto L10;
-       }
-       ret_val = i;
-       i__2 = ix;
-       smax = (d__1 = CX(ix).r, fabs(d__1));
-L10:
-       ix += *incx;
-/* L20: */
-    }
-    return ret_val;
-
-/*     CODE FOR INCREMENT EQUAL TO 1 */
-
-L30:
-    smax = (d__1 = CX(1).r, fabs(d__1));
-    i__1 = *n;
-    for (i = 2; i <= *n; ++i) {
-       i__2 = i;
-       if ((d__1 = CX(i).r, fabs(d__1)) <= smax) {
-           goto L40;
-       }
-       ret_val = i;
-       i__2 = i;
-       smax = (d__1 = CX(i).r, fabs(d__1));
-L40:
-       ;
-    }
-    return ret_val;
-
-/*     End of IZMAX1 */
-
-} /* izmax1_ */
-
diff --git a/superlu/lsame.c b/superlu/lsame.c
deleted file mode 100644
index e235b88d..00000000
--- a/superlu/lsame.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "slu_Cnames.h"
-
-int lsame_(char *ca, char *cb)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-
-       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-    Purpose   
-    =======   
-
-    LSAME returns .TRUE. if CA is the same letter as CB regardless of case.   
-
-    Arguments   
-    =========   
-
-    CA      (input) CHARACTER*1   
-    CB      (input) CHARACTER*1   
-            CA and CB specify the single characters to be compared.   
-
-   ===================================================================== 
-*/  
-
-    /* System generated locals */
-    int ret_val;
-    
-    /* Local variables */
-    int inta, intb, zcode;
-
-    ret_val = *(unsigned char *)ca == *(unsigned char *)cb;
-    if (ret_val) {
-       return ret_val;
-    }
-
-    /* Now test for equivalence if both characters are alphabetic. */
-
-    zcode = 'Z';
-
-    /* Use 'Z' rather than 'A' so that ASCII can be detected on Prime   
-       machines, on which ICHAR returns a value with bit 8 set.   
-       ICHAR('A') on Prime machines returns 193 which is the same as   
-       ICHAR('A') on an EBCDIC machine. */
-
-    inta = *(unsigned char *)ca;
-    intb = *(unsigned char *)cb;
-
-    if (zcode == 90 || zcode == 122) {
-       /* ASCII is assumed - ZCODE is the ASCII code of either lower or   
-          upper case 'Z'. */
-       if (inta >= 97 && inta <= 122) inta += -32;
-       if (intb >= 97 && intb <= 122) intb += -32;
-
-    } else if (zcode == 233 || zcode == 169) {
-       /* EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or   
-          upper case 'Z'. */
-       if (inta >= 129 && inta <= 137 || inta >= 145 && inta <= 153 || inta 
-               >= 162 && inta <= 169)
-           inta += 64;
-       if (intb >= 129 && intb <= 137 || intb >= 145 && intb <= 153 || intb 
-               >= 162 && intb <= 169)
-           intb += 64;
-    } else if (zcode == 218 || zcode == 250) {
-       /* ASCII is assumed, on Prime machines - ZCODE is the ASCII code   
-          plus 128 of either lower or upper case 'Z'. */
-       if (inta >= 225 && inta <= 250) inta += -32;
-       if (intb >= 225 && intb <= 250) intb += -32;
-    }
-    ret_val = inta == intb;
-    return ret_val;
-    
-} /* lsame_ */
diff --git a/superlu/memory.c b/superlu/memory.c
deleted file mode 100644
index c614e14d..00000000
--- a/superlu/memory.c
+++ /dev/null
@@ -1,230 +0,0 @@
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/** Precision-independent memory-related routines.
-    (Shared by [sdcz]memory.c) **/
-
-#include "slu_ddefs.h"
-
-
-#if ( DEBUGlevel>=1 )           /* Debug malloc/free. */
-int superlu_malloc_total = 0;
-
-#define PAD_FACTOR  2
-#define DWORD  (sizeof(double)) /* Be sure it's no smaller than double. */
-/* size_t is usually defined as 'unsigned long' */
-
-void *superlu_malloc(size_t size)
-{
-    char *buf;
-
-    buf = (char *) malloc(size + DWORD);
-    if ( !buf ) {
-       printf("superlu_malloc fails: malloc_total %.0f MB, size %ld\n",
-              superlu_malloc_total*1e-6, size);
-       ABORT("superlu_malloc: out of memory");
-    }
-
-    ((int_t *) buf)[0] = size;
-#if 0
-    superlu_malloc_total += size + DWORD;
-#else
-    superlu_malloc_total += size;
-#endif
-    return (void *) (buf + DWORD);
-}
-
-void superlu_free(void *addr)
-{
-    char *p = ((char *) addr) - DWORD;
-
-    if ( !addr )
-       ABORT("superlu_free: tried to free NULL pointer");
-
-    if ( !p )
-       ABORT("superlu_free: tried to free NULL+DWORD pointer");
-
-    { 
-       int_t n = ((int_t *) p)[0];
-       
-       if ( !n )
-           ABORT("superlu_free: tried to free a freed pointer");
-       *((int_t *) p) = 0; /* Set to zero to detect duplicate free's. */
-#if 0  
-       superlu_malloc_total -= (n + DWORD);
-#else
-       superlu_malloc_total -= n;
-#endif
-
-       if ( superlu_malloc_total < 0 )
-           ABORT("superlu_malloc_total went negative!");
-       
-       /*free (addr);*/
-       free (p);
-    }
-
-}
-
-#else   /* production mode */
-
-void *superlu_malloc(size_t size)
-{
-    void *buf;
-    buf = (void *) malloc(size);
-    return (buf);
-}
-
-void superlu_free(void *addr)
-{
-    free (addr);
-}
-
-#endif
-
-
-/*
- * Set up pointers for integer working arrays.
- */
-void
-SetIWork(int m, int n, int panel_size, int *iworkptr, int **segrep,
-        int **parent, int **xplore, int **repfnz, int **panel_lsub,
-        int **xprune, int **marker)
-{
-    *segrep = iworkptr;
-    *parent = iworkptr + m;
-    *xplore = *parent + m;
-    *repfnz = *xplore + m;
-    *panel_lsub = *repfnz + panel_size * m;
-    *xprune = *panel_lsub + panel_size * m;
-    *marker = *xprune + n;
-    ifill (*repfnz, m * panel_size, EMPTY);
-    ifill (*panel_lsub, m * panel_size, EMPTY);
-}
-
-
-void
-copy_mem_int(int howmany, void *old, void *new)
-{
-    register int i;
-    int *iold = old;
-    int *inew = new;
-    for (i = 0; i < howmany; i++) inew[i] = iold[i];
-}
-
-
-void
-user_bcopy(char *src, char *dest, int bytes)
-{
-    char *s_ptr, *d_ptr;
-
-    s_ptr = src + bytes - 1;
-    d_ptr = dest + bytes - 1;
-    for (; d_ptr >= dest; --s_ptr, --d_ptr ) *d_ptr = *s_ptr;
-}
-
-
-
-int *intMalloc(int n)
-{
-    int *buf;
-    buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC fails for buf in intMalloc()");
-    }
-    return (buf);
-}
-
-int *intCalloc(int n)
-{
-    int *buf;
-    register int i;
-    buf = (int *) SUPERLU_MALLOC(n * sizeof(int));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC fails for buf in intCalloc()");
-    }
-    for (i = 0; i < n; ++i) buf[i] = 0;
-    return (buf);
-}
-
-
-
-#if 0
-check_expanders()
-{
-    int p;
-    printf("Check expanders:\n");
-    for (p = 0; p < NO_MEMTYPE; p++) {
-       printf("type %d, size %d, mem %d\n",
-              p, expanders[p].size, (int)expanders[p].mem);
-    }
-
-    return 0;
-}
-
-
-StackInfo()
-{
-    printf("Stack: size %d, used %d, top1 %d, top2 %d\n",
-          stack.size, stack.used, stack.top1, stack.top2);
-    return 0;
-}
-
-
-
-PrintStack(char *msg, GlobalLU_t *Glu)
-{
-    int i;
-    int *xlsub, *lsub, *xusub, *usub;
-
-    xlsub = Glu->xlsub;
-    lsub  = Glu->lsub;
-    xusub = Glu->xusub;
-    usub  = Glu->usub;
-
-    printf("%s\n", msg);
-    
-/*    printf("\nUCOL: ");
-    for (i = 0; i < xusub[ndim]; ++i)
-       printf("%f  ", ucol[i]);
-
-    printf("\nLSUB: ");
-    for (i = 0; i < xlsub[ndim]; ++i)
-       printf("%d  ", lsub[i]);
-
-    printf("\nUSUB: ");
-    for (i = 0; i < xusub[ndim]; ++i)
-       printf("%d  ", usub[i]);
-
-    printf("\n");*/
-    return 0;
-}   
-#endif
-
-
-
diff --git a/superlu/mmd.c b/superlu/mmd.c
deleted file mode 100644
index 1fb196c9..00000000
--- a/superlu/mmd.c
+++ /dev/null
@@ -1,1021 +0,0 @@
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-typedef int shortint;
-
-/* *************************************************************** */
-/* *************************************************************** */
-/* ****     GENMMD ..... MULTIPLE MINIMUM EXTERNAL DEGREE     **** */
-/* *************************************************************** */
-/* *************************************************************** */
-
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
-
-/*     PURPOSE - THIS ROUTINE IMPLEMENTS THE MINIMUM DEGREE */
-/*        ALGORITHM.  IT MAKES USE OF THE IMPLICIT REPRESENTATION */
-/*        OF ELIMINATION GRAPHS BY QUOTIENT GRAPHS, AND THE */
-/*        NOTION OF INDISTINGUISHABLE NODES.  IT ALSO IMPLEMENTS */
-/*        THE MODIFICATIONS BY MULTIPLE ELIMINATION AND MINIMUM */
-/*        EXTERNAL DEGREE. */
-/*        --------------------------------------------- */
-/*        CAUTION - THE ADJACENCY VECTOR ADJNCY WILL BE */
-/*        DESTROYED. */
-/*        --------------------------------------------- */
-
-/*     INPUT PARAMETERS - */
-/*        NEQNS  - NUMBER OF EQUATIONS. */
-/*        (XADJ,ADJNCY) - THE ADJACENCY STRUCTURE. */
-/*        DELTA  - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
-/*        MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) INTEGER */
-/*                 (ANY SMALLER ESTIMATE WILL DO) FOR MARKING */
-/*                 NODES. */
-
-/*     OUTPUT PARAMETERS - */
-/*        PERM   - THE MINIMUM DEGREE ORDERING. */
-/*        INVP   - THE INVERSE OF PERM. */
-/*        NOFSUB - AN UPPER BOUND ON THE NUMBER OF NONZERO */
-/*                 SUBSCRIPTS FOR THE COMPRESSED STORAGE SCHEME. */
-
-/*     WORKING PARAMETERS - */
-/*        DHEAD  - VECTOR FOR HEAD OF DEGREE LISTS. */
-/*        INVP   - USED TEMPORARILY FOR DEGREE FORWARD LINK. */
-/*        PERM   - USED TEMPORARILY FOR DEGREE BACKWARD LINK. */
-/*        QSIZE  - VECTOR FOR SIZE OF SUPERNODES. */
-/*        LLIST  - VECTOR FOR TEMPORARY LINKED LISTS. */
-/*        MARKER - A TEMPORARY MARKER VECTOR. */
-
-/*     PROGRAM SUBROUTINES - */
-/*        MMDELM, MMDINT, MMDNUM, MMDUPD. */
-
-/* *************************************************************** */
-
-/* Subroutine */ int genmmd_(int *neqns, int *xadj, shortint *adjncy, 
-       shortint *invp, shortint *perm, int *delta, shortint *dhead, 
-       shortint *qsize, shortint *llist, shortint *marker, int *maxint, 
-       int *nofsub)
-{
-    /* System generated locals */
-    int i__1;
-
-    /* Local variables */
-    static int mdeg, ehead, i, mdlmt, mdnode;
-    extern /* Subroutine */ int mmdelm_(int *, int *, shortint *, 
-           shortint *, shortint *, shortint *, shortint *, shortint *, 
-           shortint *, int *, int *), mmdupd_(int *, int *, 
-           int *, shortint *, int *, int *, shortint *, shortint 
-           *, shortint *, shortint *, shortint *, shortint *, int *, 
-           int *), mmdint_(int *, int *, shortint *, shortint *, 
-           shortint *, shortint *, shortint *, shortint *, shortint *), 
-           mmdnum_(int *, shortint *, shortint *, shortint *);
-    static int nextmd, tag, num;
-
-
-/* *************************************************************** */
-
-
-/* *************************************************************** */
-
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dhead;
-    --perm;
-    --invp;
-    --adjncy;
-    --xadj;
-
-    /* Function Body */
-    if (*neqns <= 0) {
-       return 0;
-    }
-
-/*        ------------------------------------------------ */
-/*        INITIALIZATION FOR THE MINIMUM DEGREE ALGORITHM. */
-/*        ------------------------------------------------ */
-    *nofsub = 0;
-    mmdint_(neqns, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
-           qsize[1], &llist[1], &marker[1]);
-
-/*        ---------------------------------------------- */
-/*        NUM COUNTS THE NUMBER OF ORDERED NODES PLUS 1. */
-/*        ---------------------------------------------- */
-    num = 1;
-
-/*        ----------------------------- */
-/*        ELIMINATE ALL ISOLATED NODES. */
-/*        ----------------------------- */
-    nextmd = dhead[1];
-L100:
-    if (nextmd <= 0) {
-       goto L200;
-    }
-    mdnode = nextmd;
-    nextmd = invp[mdnode];
-    marker[mdnode] = *maxint;
-    invp[mdnode] = -num;
-    ++num;
-    goto L100;
-
-L200:
-/*        ---------------------------------------- */
-/*        SEARCH FOR NODE OF THE MINIMUM DEGREE. */
-/*        MDEG IS THE CURRENT MINIMUM DEGREE; */
-/*        TAG IS USED TO FACILITATE MARKING NODES. */
-/*        ---------------------------------------- */
-    if (num > *neqns) {
-       goto L1000;
-    }
-    tag = 1;
-    dhead[1] = 0;
-    mdeg = 2;
-L300:
-    if (dhead[mdeg] > 0) {
-       goto L400;
-    }
-    ++mdeg;
-    goto L300;
-L400:
-/*            ------------------------------------------------- */
-/*            USE VALUE OF DELTA TO SET UP MDLMT, WHICH GOVERNS */
-/*            WHEN A DEGREE UPDATE IS TO BE PERFORMED. */
-/*            ------------------------------------------------- */
-    mdlmt = mdeg + *delta;
-    ehead = 0;
-
-L500:
-    mdnode = dhead[mdeg];
-    if (mdnode > 0) {
-       goto L600;
-    }
-    ++mdeg;
-    if (mdeg > mdlmt) {
-       goto L900;
-    }
-    goto L500;
-L600:
-/*                ---------------------------------------- */
-/*                REMOVE MDNODE FROM THE DEGREE STRUCTURE. */
-/*                ---------------------------------------- */
-    nextmd = invp[mdnode];
-    dhead[mdeg] = nextmd;
-    if (nextmd > 0) {
-       perm[nextmd] = -mdeg;
-    }
-    invp[mdnode] = -num;
-    *nofsub = *nofsub + mdeg + qsize[mdnode] - 2;
-    if (num + qsize[mdnode] > *neqns) {
-       goto L1000;
-    }
-/*                ---------------------------------------------- */
-/*                ELIMINATE MDNODE AND PERFORM QUOTIENT GRAPH */
-/*                TRANSFORMATION.  RESET TAG VALUE IF NECESSARY. */
-/*                ---------------------------------------------- */
-    ++tag;
-    if (tag < *maxint) {
-       goto L800;
-    }
-    tag = 1;
-    i__1 = *neqns;
-    for (i = 1; i <= i__1; ++i) {
-       if (marker[i] < *maxint) {
-           marker[i] = 0;
-       }
-/* L700: */
-    }
-L800:
-    mmdelm_(&mdnode, &xadj[1], &adjncy[1], &dhead[1], &invp[1], &perm[1], &
-           qsize[1], &llist[1], &marker[1], maxint, &tag);
-    num += qsize[mdnode];
-    llist[mdnode] = ehead;
-    ehead = mdnode;
-    if (*delta >= 0) {
-       goto L500;
-    }
-L900:
-/*            ------------------------------------------- */
-/*            UPDATE DEGREES OF THE NODES INVOLVED IN THE */
-/*            MINIMUM DEGREE NODES ELIMINATION. */
-/*            ------------------------------------------- */
-    if (num > *neqns) {
-       goto L1000;
-    }
-    mmdupd_(&ehead, neqns, &xadj[1], &adjncy[1], delta, &mdeg, &dhead[1], &
-           invp[1], &perm[1], &qsize[1], &llist[1], &marker[1], maxint, &tag)
-           ;
-    goto L300;
-
-L1000:
-    mmdnum_(neqns, &perm[1], &invp[1], &qsize[1]);
-    return 0;
-
-} /* genmmd_ */
-
-/* *************************************************************** */
-/* *************************************************************** */
-/* ***     MMDINT ..... MULT MINIMUM DEGREE INITIALIZATION     *** */
-/* *************************************************************** */
-/* *************************************************************** */
-
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
-
-/*     PURPOSE - THIS ROUTINE PERFORMS INITIALIZATION FOR THE */
-/*        MULTIPLE ELIMINATION VERSION OF THE MINIMUM DEGREE */
-/*        ALGORITHM. */
-
-/*     INPUT PARAMETERS - */
-/*        NEQNS  - NUMBER OF EQUATIONS. */
-/*        (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
-
-/*     OUTPUT PARAMETERS - */
-/*        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
-/*        QSIZE  - SIZE OF SUPERNODE (INITIALIZED TO ONE). */
-/*        LLIST  - LINKED LIST. */
-/*        MARKER - MARKER VECTOR. */
-
-/* *************************************************************** */
-
-/* Subroutine */ int mmdint_(int *neqns, int *xadj, shortint *adjncy, 
-       shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize, 
-       shortint *llist, shortint *marker)
-{
-    /* System generated locals */
-    int i__1;
-
-    /* Local variables */
-    static int ndeg, node, fnode;
-
-
-/* *************************************************************** */
-
-
-/* *************************************************************** */
-
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dbakw;
-    --dforw;
-    --dhead;
-    --adjncy;
-    --xadj;
-
-    /* Function Body */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       dhead[node] = 0;
-       qsize[node] = 1;
-       marker[node] = 0;
-       llist[node] = 0;
-/* L100: */
-    }
-/*        ------------------------------------------ */
-/*        INITIALIZE THE DEGREE DOUBLY LINKED LISTS. */
-/*        ------------------------------------------ */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       ndeg = xadj[node + 1] - xadj[node] + 1;
-       fnode = dhead[ndeg];
-       dforw[node] = fnode;
-       dhead[ndeg] = node;
-       if (fnode > 0) {
-           dbakw[fnode] = node;
-       }
-       dbakw[node] = -ndeg;
-/* L200: */
-    }
-    return 0;
-
-} /* mmdint_ */
-
-/* *************************************************************** */
-/* *************************************************************** */
-/* **     MMDELM ..... MULTIPLE MINIMUM DEGREE ELIMINATION     *** */
-/* *************************************************************** */
-/* *************************************************************** */
-
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
-
-/*     PURPOSE - THIS ROUTINE ELIMINATES THE NODE MDNODE OF */
-/*        MINIMUM DEGREE FROM THE ADJACENCY STRUCTURE, WHICH */
-/*        IS STORED IN THE QUOTIENT GRAPH FORMAT.  IT ALSO */
-/*        TRANSFORMS THE QUOTIENT GRAPH REPRESENTATION OF THE */
-/*        ELIMINATION GRAPH. */
-
-/*     INPUT PARAMETERS - */
-/*        MDNODE - NODE OF MINIMUM DEGREE. */
-/*        MAXINT - ESTIMATE OF MAXIMUM REPRESENTABLE (SHORT) */
-/*                 INT. */
-/*        TAG    - TAG VALUE. */
-
-/*     UPDATED PARAMETERS - */
-/*        (XADJ,ADJNCY) - UPDATED ADJACENCY STRUCTURE. */
-/*        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
-/*        QSIZE  - SIZE OF SUPERNODE. */
-/*        MARKER - MARKER VECTOR. */
-/*        LLIST  - TEMPORARY LINKED LIST OF ELIMINATED NABORS. */
-
-/* *************************************************************** */
-
-/* Subroutine */ int mmdelm_(int *mdnode, int *xadj, shortint *adjncy,
-        shortint *dhead, shortint *dforw, shortint *dbakw, shortint *qsize, 
-       shortint *llist, shortint *marker, int *maxint, int *tag)
-{
-    /* System generated locals */
-    int i__1, i__2;
-
-    /* Local variables */
-    static int node, link, rloc, rlmt, i, j, nabor, rnode, elmnt, xqnbr, 
-           istop, jstop, istrt, jstrt, nxnode, pvnode, nqnbrs, npv;
-
-
-/* *************************************************************** */
-
-
-/* *************************************************************** */
-
-/*        ----------------------------------------------- */
-/*        FIND REACHABLE SET AND PLACE IN DATA STRUCTURE. */
-/*        ----------------------------------------------- */
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dbakw;
-    --dforw;
-    --dhead;
-    --adjncy;
-    --xadj;
-
-    /* Function Body */
-    marker[*mdnode] = *tag;
-    istrt = xadj[*mdnode];
-    istop = xadj[*mdnode + 1] - 1;
-/*        ------------------------------------------------------- */
-/*        ELMNT POINTS TO THE BEGINNING OF THE LIST OF ELIMINATED */
-/*        NABORS OF MDNODE, AND RLOC GIVES THE STORAGE LOCATION */
-/*        FOR THE NEXT REACHABLE NODE. */
-/*        ------------------------------------------------------- */
-    elmnt = 0;
-    rloc = istrt;
-    rlmt = istop;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       nabor = adjncy[i];
-       if (nabor == 0) {
-           goto L300;
-       }
-       if (marker[nabor] >= *tag) {
-           goto L200;
-       }
-       marker[nabor] = *tag;
-       if (dforw[nabor] < 0) {
-           goto L100;
-       }
-       adjncy[rloc] = nabor;
-       ++rloc;
-       goto L200;
-L100:
-       llist[nabor] = elmnt;
-       elmnt = nabor;
-L200:
-       ;
-    }
-L300:
-/*            ----------------------------------------------------- */
-/*            MERGE WITH REACHABLE NODES FROM GENERALIZED ELEMENTS. */
-/*            ----------------------------------------------------- */
-    if (elmnt <= 0) {
-       goto L1000;
-    }
-    adjncy[rlmt] = -elmnt;
-    link = elmnt;
-L400:
-    jstrt = xadj[link];
-    jstop = xadj[link + 1] - 1;
-    i__1 = jstop;
-    for (j = jstrt; j <= i__1; ++j) {
-       node = adjncy[j];
-       link = -node;
-       if (node < 0) {
-           goto L400;
-       } else if (node == 0) {
-           goto L900;
-       } else {
-           goto L500;
-       }
-L500:
-       if (marker[node] >= *tag || dforw[node] < 0) {
-           goto L800;
-       }
-       marker[node] = *tag;
-/*                            --------------------------------- */
-/*                            USE STORAGE FROM ELIMINATED NODES */
-/*                            IF NECESSARY. */
-/*                            --------------------------------- */
-L600:
-       if (rloc < rlmt) {
-           goto L700;
-       }
-       link = -adjncy[rlmt];
-       rloc = xadj[link];
-       rlmt = xadj[link + 1] - 1;
-       goto L600;
-L700:
-       adjncy[rloc] = node;
-       ++rloc;
-L800:
-       ;
-    }
-L900:
-    elmnt = llist[elmnt];
-    goto L300;
-L1000:
-    if (rloc <= rlmt) {
-       adjncy[rloc] = 0;
-    }
-/*        -------------------------------------------------------- */
-/*        FOR EACH NODE IN THE REACHABLE SET, DO THE FOLLOWING ... */
-/*        -------------------------------------------------------- */
-    link = *mdnode;
-L1100:
-    istrt = xadj[link];
-    istop = xadj[link + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       rnode = adjncy[i];
-       link = -rnode;
-       if (rnode < 0) {
-           goto L1100;
-       } else if (rnode == 0) {
-           goto L1800;
-       } else {
-           goto L1200;
-       }
-L1200:
-/*                -------------------------------------------- */
-/*                IF RNODE IS IN THE DEGREE LIST STRUCTURE ... */
-/*                -------------------------------------------- */
-       pvnode = dbakw[rnode];
-       if (pvnode == 0 || pvnode == -(*maxint)) {
-           goto L1300;
-       }
-/*                    ------------------------------------- */
-/*                    THEN REMOVE RNODE FROM THE STRUCTURE. */
-/*                    ------------------------------------- */
-       nxnode = dforw[rnode];
-       if (nxnode > 0) {
-           dbakw[nxnode] = pvnode;
-       }
-       if (pvnode > 0) {
-           dforw[pvnode] = nxnode;
-       }
-       npv = -pvnode;
-       if (pvnode < 0) {
-           dhead[npv] = nxnode;
-       }
-L1300:
-/*                ---------------------------------------- */
-/*                PURGE INACTIVE QUOTIENT NABORS OF RNODE. */
-/*                ---------------------------------------- */
-       jstrt = xadj[rnode];
-       jstop = xadj[rnode + 1] - 1;
-       xqnbr = jstrt;
-       i__2 = jstop;
-       for (j = jstrt; j <= i__2; ++j) {
-           nabor = adjncy[j];
-           if (nabor == 0) {
-               goto L1500;
-           }
-           if (marker[nabor] >= *tag) {
-               goto L1400;
-           }
-           adjncy[xqnbr] = nabor;
-           ++xqnbr;
-L1400:
-           ;
-       }
-L1500:
-/*                ---------------------------------------- */
-/*                IF NO ACTIVE NABOR AFTER THE PURGING ... */
-/*                ---------------------------------------- */
-       nqnbrs = xqnbr - jstrt;
-       if (nqnbrs > 0) {
-           goto L1600;
-       }
-/*                    ----------------------------- */
-/*                    THEN MERGE RNODE WITH MDNODE. */
-/*                    ----------------------------- */
-       qsize[*mdnode] += qsize[rnode];
-       qsize[rnode] = 0;
-       marker[rnode] = *maxint;
-       dforw[rnode] = -(*mdnode);
-       dbakw[rnode] = -(*maxint);
-       goto L1700;
-L1600:
-/*                -------------------------------------- */
-/*                ELSE FLAG RNODE FOR DEGREE UPDATE, AND */
-/*                ADD MDNODE AS A NABOR OF RNODE. */
-/*                -------------------------------------- */
-       dforw[rnode] = nqnbrs + 1;
-       dbakw[rnode] = 0;
-       adjncy[xqnbr] = *mdnode;
-       ++xqnbr;
-       if (xqnbr <= jstop) {
-           adjncy[xqnbr] = 0;
-       }
-
-L1700:
-       ;
-    }
-L1800:
-    return 0;
-
-} /* mmdelm_ */
-
-/* *************************************************************** */
-/* *************************************************************** */
-/* *****     MMDUPD ..... MULTIPLE MINIMUM DEGREE UPDATE     ***** */
-/* *************************************************************** */
-/* *************************************************************** */
-
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
-
-/*     PURPOSE - THIS ROUTINE UPDATES THE DEGREES OF NODES */
-/*        AFTER A MULTIPLE ELIMINATION STEP. */
-
-/*     INPUT PARAMETERS - */
-/*        EHEAD  - THE BEGINNING OF THE LIST OF ELIMINATED */
-/*                 NODES (I.E., NEWLY FORMED ELEMENTS). */
-/*        NEQNS  - NUMBER OF EQUATIONS. */
-/*        (XADJ,ADJNCY) - ADJACENCY STRUCTURE. */
-/*        DELTA  - TOLERANCE VALUE FOR MULTIPLE ELIMINATION. */
-/*        MAXINT - MAXIMUM MACHINE REPRESENTABLE (SHORT) */
-/*                 INTEGER. */
-
-/*     UPDATED PARAMETERS - */
-/*        MDEG   - NEW MINIMUM DEGREE AFTER DEGREE UPDATE. */
-/*        (DHEAD,DFORW,DBAKW) - DEGREE DOUBLY LINKED STRUCTURE. */
-/*        QSIZE  - SIZE OF SUPERNODE. */
-/*        LLIST  - WORKING LINKED LIST. */
-/*        MARKER - MARKER VECTOR FOR DEGREE UPDATE. */
-/*        TAG    - TAG VALUE. */
-
-/* *************************************************************** */
-
-/* Subroutine */ int mmdupd_(int *ehead, int *neqns, int *xadj, 
-       shortint *adjncy, int *delta, int *mdeg, shortint *dhead, 
-       shortint *dforw, shortint *dbakw, shortint *qsize, shortint *llist, 
-       shortint *marker, int *maxint, int *tag)
-{
-    /* System generated locals */
-    int i__1, i__2;
-
-    /* Local variables */
-    static int node, mtag, link, mdeg0, i, j, enode, fnode, nabor, elmnt, 
-           istop, jstop, q2head, istrt, jstrt, qxhead, iq2, deg, deg0;
-
-
-/* *************************************************************** */
-
-
-/* *************************************************************** */
-
-    /* Parameter adjustments */
-    --marker;
-    --llist;
-    --qsize;
-    --dbakw;
-    --dforw;
-    --dhead;
-    --adjncy;
-    --xadj;
-
-    /* Function Body */
-    mdeg0 = *mdeg + *delta;
-    elmnt = *ehead;
-L100:
-/*            ------------------------------------------------------- */
-/*            FOR EACH OF THE NEWLY FORMED ELEMENT, DO THE FOLLOWING. */
-/*            (RESET TAG VALUE IF NECESSARY.) */
-/*            ------------------------------------------------------- */
-    if (elmnt <= 0) {
-       return 0;
-    }
-    mtag = *tag + mdeg0;
-    if (mtag < *maxint) {
-       goto L300;
-    }
-    *tag = 1;
-    i__1 = *neqns;
-    for (i = 1; i <= i__1; ++i) {
-       if (marker[i] < *maxint) {
-           marker[i] = 0;
-       }
-/* L200: */
-    }
-    mtag = *tag + mdeg0;
-L300:
-/*            --------------------------------------------- */
-/*            CREATE TWO LINKED LISTS FROM NODES ASSOCIATED */
-/*            WITH ELMNT: ONE WITH TWO NABORS (Q2HEAD) IN */
-/*            ADJACENCY STRUCTURE, AND THE OTHER WITH MORE */
-/*            THAN TWO NABORS (QXHEAD).  ALSO COMPUTE DEG0, */
-/*            NUMBER OF NODES IN THIS ELEMENT. */
-/*            --------------------------------------------- */
-    q2head = 0;
-    qxhead = 0;
-    deg0 = 0;
-    link = elmnt;
-L400:
-    istrt = xadj[link];
-    istop = xadj[link + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       enode = adjncy[i];
-       link = -enode;
-       if (enode < 0) {
-           goto L400;
-       } else if (enode == 0) {
-           goto L800;
-       } else {
-           goto L500;
-       }
-
-L500:
-       if (qsize[enode] == 0) {
-           goto L700;
-       }
-       deg0 += qsize[enode];
-       marker[enode] = mtag;
-/*                        ---------------------------------- */
-/*                        IF ENODE REQUIRES A DEGREE UPDATE, */
-/*                        THEN DO THE FOLLOWING. */
-/*                        ---------------------------------- */
-       if (dbakw[enode] != 0) {
-           goto L700;
-       }
-/*                            --------------------------------------- 
-*/
-/*                            PLACE EITHER IN QXHEAD OR Q2HEAD LISTS. 
-*/
-/*                            --------------------------------------- 
-*/
-       if (dforw[enode] == 2) {
-           goto L600;
-       }
-       llist[enode] = qxhead;
-       qxhead = enode;
-       goto L700;
-L600:
-       llist[enode] = q2head;
-       q2head = enode;
-L700:
-       ;
-    }
-L800:
-/*            -------------------------------------------- */
-/*            FOR EACH ENODE IN Q2 LIST, DO THE FOLLOWING. */
-/*            -------------------------------------------- */
-    enode = q2head;
-    iq2 = 1;
-L900:
-    if (enode <= 0) {
-       goto L1500;
-    }
-    if (dbakw[enode] != 0) {
-       goto L2200;
-    }
-    ++(*tag);
-    deg = deg0;
-/*                    ------------------------------------------ */
-/*                    IDENTIFY THE OTHER ADJACENT ELEMENT NABOR. */
-/*                    ------------------------------------------ */
-    istrt = xadj[enode];
-    nabor = adjncy[istrt];
-    if (nabor == elmnt) {
-       nabor = adjncy[istrt + 1];
-    }
-/*                    ------------------------------------------------ */
-/*                    IF NABOR IS UNELIMINATED, INCREASE DEGREE COUNT. */
-/*                    ------------------------------------------------ */
-    link = nabor;
-    if (dforw[nabor] < 0) {
-       goto L1000;
-    }
-    deg += qsize[nabor];
-    goto L2100;
-L1000:
-/*                        -------------------------------------------- */
-/*                        OTHERWISE, FOR EACH NODE IN THE 2ND ELEMENT, */
-/*                        DO THE FOLLOWING. */
-/*                        -------------------------------------------- */
-    istrt = xadj[link];
-    istop = xadj[link + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       node = adjncy[i];
-       link = -node;
-       if (node == enode) {
-           goto L1400;
-       }
-       if (node < 0) {
-           goto L1000;
-       } else if (node == 0) {
-           goto L2100;
-       } else {
-           goto L1100;
-       }
-
-L1100:
-       if (qsize[node] == 0) {
-           goto L1400;
-       }
-       if (marker[node] >= *tag) {
-           goto L1200;
-       }
-/*                                -----------------------------------
--- */
-/*                                CASE WHEN NODE IS NOT YET CONSIDERED
-. */
-/*                                -----------------------------------
--- */
-       marker[node] = *tag;
-       deg += qsize[node];
-       goto L1400;
-L1200:
-/*                            ----------------------------------------
- */
-/*                            CASE WHEN NODE IS INDISTINGUISHABLE FROM
- */
-/*                            ENODE.  MERGE THEM INTO A NEW SUPERNODE.
- */
-/*                            ----------------------------------------
- */
-       if (dbakw[node] != 0) {
-           goto L1400;
-       }
-       if (dforw[node] != 2) {
-           goto L1300;
-       }
-       qsize[enode] += qsize[node];
-       qsize[node] = 0;
-       marker[node] = *maxint;
-       dforw[node] = -enode;
-       dbakw[node] = -(*maxint);
-       goto L1400;
-L1300:
-/*                            -------------------------------------- 
-*/
-/*                            CASE WHEN NODE IS OUTMATCHED BY ENODE. 
-*/
-/*                            -------------------------------------- 
-*/
-       if (dbakw[node] == 0) {
-           dbakw[node] = -(*maxint);
-       }
-L1400:
-       ;
-    }
-    goto L2100;
-L1500:
-/*                ------------------------------------------------ */
-/*                FOR EACH ENODE IN THE QX LIST, DO THE FOLLOWING. */
-/*                ------------------------------------------------ */
-    enode = qxhead;
-    iq2 = 0;
-L1600:
-    if (enode <= 0) {
-       goto L2300;
-    }
-    if (dbakw[enode] != 0) {
-       goto L2200;
-    }
-    ++(*tag);
-    deg = deg0;
-/*                        --------------------------------- */
-/*                        FOR EACH UNMARKED NABOR OF ENODE, */
-/*                        DO THE FOLLOWING. */
-/*                        --------------------------------- */
-    istrt = xadj[enode];
-    istop = xadj[enode + 1] - 1;
-    i__1 = istop;
-    for (i = istrt; i <= i__1; ++i) {
-       nabor = adjncy[i];
-       if (nabor == 0) {
-           goto L2100;
-       }
-       if (marker[nabor] >= *tag) {
-           goto L2000;
-       }
-       marker[nabor] = *tag;
-       link = nabor;
-/*                                ------------------------------ */
-/*                                IF UNELIMINATED, INCLUDE IT IN */
-/*                                DEG COUNT. */
-/*                                ------------------------------ */
-       if (dforw[nabor] < 0) {
-           goto L1700;
-       }
-       deg += qsize[nabor];
-       goto L2000;
-L1700:
-/*                                    ------------------------------- 
-*/
-/*                                    IF ELIMINATED, INCLUDE UNMARKED 
-*/
-/*                                    NODES IN THIS ELEMENT INTO THE 
-*/
-/*                                    DEGREE COUNT. */
-/*                                    ------------------------------- 
-*/
-       jstrt = xadj[link];
-       jstop = xadj[link + 1] - 1;
-       i__2 = jstop;
-       for (j = jstrt; j <= i__2; ++j) {
-           node = adjncy[j];
-           link = -node;
-           if (node < 0) {
-               goto L1700;
-           } else if (node == 0) {
-               goto L2000;
-           } else {
-               goto L1800;
-           }
-
-L1800:
-           if (marker[node] >= *tag) {
-               goto L1900;
-           }
-           marker[node] = *tag;
-           deg += qsize[node];
-L1900:
-           ;
-       }
-L2000:
-       ;
-    }
-L2100:
-/*                    ------------------------------------------- */
-/*                    UPDATE EXTERNAL DEGREE OF ENODE IN DEGREE */
-/*                    STRUCTURE, AND MDEG (MIN DEG) IF NECESSARY. */
-/*                    ------------------------------------------- */
-    deg = deg - qsize[enode] + 1;
-    fnode = dhead[deg];
-    dforw[enode] = fnode;
-    dbakw[enode] = -deg;
-    if (fnode > 0) {
-       dbakw[fnode] = enode;
-    }
-    dhead[deg] = enode;
-    if (deg < *mdeg) {
-       *mdeg = deg;
-    }
-L2200:
-/*                    ---------------------------------- */
-/*                    GET NEXT ENODE IN CURRENT ELEMENT. */
-/*                    ---------------------------------- */
-    enode = llist[enode];
-    if (iq2 == 1) {
-       goto L900;
-    }
-    goto L1600;
-L2300:
-/*            ----------------------------- */
-/*            GET NEXT ELEMENT IN THE LIST. */
-/*            ----------------------------- */
-    *tag = mtag;
-    elmnt = llist[elmnt];
-    goto L100;
-
-} /* mmdupd_ */
-
-/* *************************************************************** */
-/* *************************************************************** */
-/* *****     MMDNUM ..... MULTI MINIMUM DEGREE NUMBERING     ***** */
-/* *************************************************************** */
-/* *************************************************************** */
-
-/*     AUTHOR - JOSEPH W.H. LIU */
-/*              DEPT OF COMPUTER SCIENCE, YORK UNIVERSITY. */
-
-/*     PURPOSE - THIS ROUTINE PERFORMS THE FINAL STEP IN */
-/*        PRODUCING THE PERMUTATION AND INVERSE PERMUTATION */
-/*        VECTORS IN THE MULTIPLE ELIMINATION VERSION OF THE */
-/*        MINIMUM DEGREE ORDERING ALGORITHM. */
-
-/*     INPUT PARAMETERS - */
-/*        NEQNS  - NUMBER OF EQUATIONS. */
-/*        QSIZE  - SIZE OF SUPERNODES AT ELIMINATION. */
-
-/*     UPDATED PARAMETERS - */
-/*        INVP   - INVERSE PERMUTATION VECTOR.  ON INPUT, */
-/*                 IF QSIZE(NODE)=0, THEN NODE HAS BEEN MERGED */
-/*                 INTO THE NODE -INVP(NODE); OTHERWISE, */
-/*                 -INVP(NODE) IS ITS INVERSE LABELLING. */
-
-/*     OUTPUT PARAMETERS - */
-/*        PERM   - THE PERMUTATION VECTOR. */
-
-/* *************************************************************** */
-
-/* Subroutine */ int mmdnum_(int *neqns, shortint *perm, shortint *invp, 
-       shortint *qsize)
-{
-    /* System generated locals */
-    int i__1;
-
-    /* Local variables */
-    static int node, root, nextf, father, nqsize, num;
-
-
-/* *************************************************************** */
-
-
-/* *************************************************************** */
-
-    /* Parameter adjustments */
-    --qsize;
-    --invp;
-    --perm;
-
-    /* Function Body */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       nqsize = qsize[node];
-       if (nqsize <= 0) {
-           perm[node] = invp[node];
-       }
-       if (nqsize > 0) {
-           perm[node] = -invp[node];
-       }
-/* L100: */
-    }
-/*        ------------------------------------------------------ */
-/*        FOR EACH NODE WHICH HAS BEEN MERGED, DO THE FOLLOWING. */
-/*        ------------------------------------------------------ */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       if (perm[node] > 0) {
-           goto L500;
-       }
-/*                ----------------------------------------- */
-/*                TRACE THE MERGED TREE UNTIL ONE WHICH HAS */
-/*                NOT BEEN MERGED, CALL IT ROOT. */
-/*                ----------------------------------------- */
-       father = node;
-L200:
-       if (perm[father] > 0) {
-           goto L300;
-       }
-       father = -perm[father];
-       goto L200;
-L300:
-/*                ----------------------- */
-/*                NUMBER NODE AFTER ROOT. */
-/*                ----------------------- */
-       root = father;
-       num = perm[root] + 1;
-       invp[node] = -num;
-       perm[root] = num;
-/*                ------------------------ */
-/*                SHORTEN THE MERGED TREE. */
-/*                ------------------------ */
-       father = node;
-L400:
-       nextf = -perm[father];
-       if (nextf <= 0) {
-           goto L500;
-       }
-       perm[father] = -root;
-       father = nextf;
-       goto L400;
-L500:
-       ;
-    }
-/*        ---------------------- */
-/*        READY TO COMPUTE PERM. */
-/*        ---------------------- */
-    i__1 = *neqns;
-    for (node = 1; node <= i__1; ++node) {
-       num = -invp[node];
-       invp[node] = num;
-       perm[num] = node;
-/* L600: */
-    }
-    return 0;
-
-} /* mmdnum_ */
-
diff --git a/superlu/relax_snode.c b/superlu/relax_snode.c
deleted file mode 100644
index 8937ac03..00000000
--- a/superlu/relax_snode.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_ddefs.h"
-
-void
-relax_snode (
-            const     int n,
-            int       *et,           /* column elimination tree */
-            const int relax_columns, /* max no of columns allowed in a
-                                        relaxed snode */
-            int       *descendants,  /* no of descendants of each node
-                                        in the etree */
-            int       *relax_end     /* last column in a supernode */
-            )
-{
-/*
- * Purpose
- * =======
- *    relax_snode() - Identify the initial relaxed supernodes, assuming that 
- *    the matrix has been reordered according to the postorder of the etree.
- *
- */ 
-    register int j, parent;
-    register int snode_start;  /* beginning of a snode */
-    
-    ifill (relax_end, n, EMPTY);
-    for (j = 0; j < n; j++) descendants[j] = 0;
-
-    /* Compute the number of descendants of each node in the etree */
-    for (j = 0; j < n; j++) {
-       parent = et[j];
-       if ( parent != n )  /* not the dummy root */
-           descendants[parent] += descendants[j] + 1;
-    }
-
-    /* Identify the relaxed supernodes by postorder traversal of the etree. */
-    for (j = 0; j < n; ) { 
-       parent = et[j];
-        snode_start = j;
-       while ( parent != n && descendants[parent] < relax_columns ) {
-           j = parent;
-           parent = et[j];
-       }
-       /* Found a supernode with j being the last column. */
-       relax_end[snode_start] = j;             /* Last column is recorded */
-       j++;
-       /* Search for a new leaf */
-       while ( descendants[j] != 0 && j < n ) j++;
-    }
-
-    /*printf("No of relaxed snodes: %d; relaxed columns: %d\n", 
-               nsuper, no_relaxed_col); */
-}
diff --git a/superlu/scolumn_bmod.c b/superlu/scolumn_bmod.c
deleted file mode 100644
index 64a1d0b4..00000000
--- a/superlu/scolumn_bmod.c
+++ /dev/null
@@ -1,360 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
-
-
-/* 
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-scolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            float     *dense,    /* in */
-            float     *tempv,    /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
-{
-/*
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int         incx = 1, incy = 1;
-    float      alpha, beta;
-    
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    float       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    float       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    float       *tempv1;
-    float      zero = 0.0;
-    float      one = 1.0;
-    float      none = -1.0;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-
-           ops[TRSV] += segsze * (segsze - 1);
-           ops[GEMV] += 2 * nrow * segsze;
-
-
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   dense[irow] -=  ukj*lusup[luptr];
-                   luptr++;
-               }
-
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   ukj -= ukj1 * lusup[luptr1];
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                                       + ukj1*lusup[luptr1] );
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   ukj1 -= ukj2 * lusup[luptr2-1];
-                   ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       dense[irow] -= ( ukj*lusup[luptr]
-                            + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                   }
-               }
-
-
-
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-
-               no_zeros = kfnz - fst_col;
-
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-               
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#else          
-               strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#endif         
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-               sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-               slsolve ( nsupr, segsze, &lusup[luptr], tempv );
-
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               smatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-#endif
-               
-               
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   dense[irow] -= tempv1[i];
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-           
-       } /* if jsupno ... */
-
-    } /* for each segment... */
-
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-    if ( fst_col < jcol ) {
-
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-       
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#else
-       strsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#endif
-       
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-
-#ifdef _CRAY
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-
-       smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-       
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[isub] -= tempv[i];
-           tempv[i] = 0.0;
-           ++isub;
-       }
-
-#endif
-       
-       
-    } /* if fst_col < jcol ... */ 
-
-    return 0;
-}
diff --git a/superlu/scolumn_dfs.c b/superlu/scolumn_dfs.c
deleted file mode 100644
index a9bd3c2f..00000000
--- a/superlu/scolumn_dfs.c
+++ /dev/null
@@ -1,278 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_sdefs.h"
-
-/* What type of supernodes we want */
-#define T2_SUPER
-
-int
-scolumn_dfs(
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
-{
-/* 
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-
-
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
*/
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
-                                        
sLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-
-                          } /* else */
-
-                       } /* if */
-
-                   } /* while */
-
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-
-               } while ( kpar != EMPTY );      /* Until empty stack */
-
-           } /* else */
-
-       } /* else */
-
-    } /* for each nonzero ... */
-
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-#endif
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-#ifdef CHK_COMPRESS
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-#endif
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-
-    }  /* else: jcol > 0 */ 
-    
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-
-    return 0;
-}
diff --git a/superlu/scomplex.c b/superlu/scomplex.c
deleted file mode 100644
index d916f062..00000000
--- a/superlu/scomplex.c
+++ /dev/null
@@ -1,127 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * This file defines common arithmetic operations for complex type.
- */
-#include <math.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include "slu_scomplex.h"
-
-
-/* Complex Division c = a/b */
-void c_div(complex *c, complex *a, complex *b)
-{
-    float ratio, den;
-    float abr, abi, cr, ci;
-  
-    if( (abr = b->r) < 0.)
-       abr = - abr;
-    if( (abi = b->i) < 0.)
-       abi = - abi;
-    if( abr <= abi ) {
-       if (abi == 0) {
-           fprintf(stderr, "z_div.c: division by zero\n");
-            exit(-1);
-       }         
-       ratio = b->r / b->i ;
-       den = b->i * (1 + ratio*ratio);
-       cr = (a->r*ratio + a->i) / den;
-       ci = (a->i*ratio - a->r) / den;
-    } else {
-       ratio = b->i / b->r ;
-       den = b->r * (1 + ratio*ratio);
-       cr = (a->r + a->i*ratio) / den;
-       ci = (a->i - a->r*ratio) / den;
-    }
-    c->r = cr;
-    c->i = ci;
-}
-
-
-/* Returns sqrt(z.r^2 + z.i^2) */
-double c_abs(complex *z)
-{
-    float temp;
-    float real = z->r;
-    float imag = z->i;
-
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-    if (imag > real) {
-       temp = real;
-       real = imag;
-       imag = temp;
-    }
-    if ((real+imag) == real) return(real);
-  
-    temp = imag/real;
-    temp = real*sqrt(1.0 + temp*temp);  /*overflow!!*/
-    return (temp);
-}
-
-
-/* Approximates the abs */
-/* Returns abs(z.r) + abs(z.i) */
-double c_abs1(complex *z)
-{
-    float real = z->r;
-    float imag = z->i;
-  
-    if (real < 0) real = -real;
-    if (imag < 0) imag = -imag;
-
-    return (real + imag);
-}
-
-/* Return the exponentiation */
-void c_exp(complex *r, complex *z)
-{
-    float expx;
-
-    expx = exp(z->r);
-    r->r = expx * cos(z->i);
-    r->i = expx * sin(z->i);
-}
-
-/* Return the complex conjugate */
-void r_cnjg(complex *r, complex *z)
-{
-    r->r = z->r;
-    r->i = -z->i;
-}
-
-/* Return the imaginary part */
-double r_imag(complex *z)
-{
-    return (z->i);
-}
-
-
diff --git a/superlu/scopy_to_ucol.c b/superlu/scopy_to_ucol.c
deleted file mode 100644
index 44a237e5..00000000
--- a/superlu/scopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_sdefs.h"
-
-int
-scopy_to_ucol(
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             float     *dense,   /* modified - reset to zero on return */
-             GlobalLU_t *Glu      /* modified */
-             )
-{
-/* 
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    float    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-
-    float zero = 0.0;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = sLUMemXpand(jcol, nextu, UCOL, &nzumax, 
Glu))
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = sLUMemXpand(jcol, nextu, USUB, &nzumax, 
Glu))
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-
-           }
-
-       }
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
-}
diff --git a/superlu/scsum1.c b/superlu/scsum1.c
deleted file mode 100644
index 42ebfe32..00000000
--- a/superlu/scsum1.c
+++ /dev/null
@@ -1,111 +0,0 @@
-#include "slu_Cnames.h"
-#include "slu_scomplex.h"
-
-double scsum1_(int *n, complex *cx, int *incx)
-{
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*! @file scsum1.c
- * \brief Takes sum of the absolute values of a complex vector and returns a 
single precision result
- *
- * <pre>
- *     -- LAPACK auxiliary routine (version 2.0) --   
- *     Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
- *     Courant Institute, Argonne National Lab, and Rice University   
- *     October 31, 1992   
- * </pre>
- */
-/* 
-
-    Purpose   
-    =======   
-
-    SCSUM1 takes the sum of the absolute values of a complex   
-    vector and returns a single precision result.   
-
-    Based on SCASUM from the Level 1 BLAS.   
-    The change is to use the 'genuine' absolute value.   
-
-    Contributed by Nick Higham for use with CLACON.   
-
-    Arguments   
-    =========   
-
-    N       (input) INT
-            The number of elements in the vector CX.   
-
-    CX      (input) COMPLEX array, dimension (N)   
-            The vector whose elements will be summed.   
-
-    INCX    (input) INT
-            The spacing between successive values of CX.  INCX > 0.   
-
-    ===================================================================== 
-  
-
-
-    
-   Parameter adjustments   
-       Function Body */
-    /* System generated locals */
-    int i__1, i__2;
-    float ret_val;
-    /* Builtin functions */
-    double c_abs(complex *);
-    /* Local variables */
-    static int i, nincx;
-    static float stemp;
-
-
-#define CX(I) cx[(I)-1]
-
-
-    ret_val = 0.f;
-    stemp = 0.f;
-    if (*n <= 0) {
-       return ret_val;
-    }
-    if (*incx == 1) {
-       goto L20;
-    }
-
-/*     CODE FOR INCREMENT NOT EQUAL TO 1 */
-
-    nincx = *n * *incx;
-    i__1 = nincx;
-    i__2 = *incx;
-    for (i = 1; *incx < 0 ? i >= nincx : i <= nincx; i += *incx) {
-
-/*        NEXT LINE MODIFIED. */
-
-       stemp += c_abs(&CX(i));
-/* L10: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*     CODE FOR INCREMENT EQUAL TO 1 */
-
-L20:
-    i__2 = *n;
-    for (i = 1; i <= *n; ++i) {
-
-/*        NEXT LINE MODIFIED. */
-
-       stemp += c_abs(&CX(i));
-/* L30: */
-    }
-    ret_val = stemp;
-    return ret_val;
-
-/*     End of SCSUM1 */
-
-} /* scsum1_ */
-
diff --git a/superlu/sgscon.c b/superlu/sgscon.c
deleted file mode 100644
index cc44712e..00000000
--- a/superlu/sgscon.c
+++ /dev/null
@@ -1,155 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
- * File name:  sgscon.c
- * History:     Modified from lapack routines SGECON.
- */
-#include <math.h>
-#include "slu_sdefs.h"
-
-void
-sgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       float anorm, float *rcond, SuperLUStat_t *stat, int *info)
-{
-/*
-    Purpose   
-    =======   
-
-    SGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by SGETRF.   
-
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-           
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            sgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- 
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            sgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_S, Mtype = TRU.
-           
-    ANORM   (input) float
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-           
-    RCOND   (output) float*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    float ainvnm;
-    float *work;
-    int    *iwork;
-    extern int srscl_(int *, float *, float *, int *);
-
-    extern int slacon_(int *, float *, float *, int *, float *, int *);
-
-    
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgscon", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-
-    work = floatCalloc( 3*L->nrow );
-    iwork = intMalloc( L->nrow );
-
-
-    if ( !work || !iwork )
-       ABORT("Malloc fails for work arrays in sgscon.");
-    
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-
-    do {
-       slacon_(&L->nrow, &work[L->nrow], &work[0], &iwork[0], &ainvnm, &kase);
-
-       if (kase == 0) break;
-
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_strsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(U). */
-           sp_strsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-           
-       } else {
-
-           /* Multiply by inv(U'). */
-           sp_strsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(L'). */
-           sp_strsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-           
-       }
-
-    } while ( kase != 0 );
-
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-
-    SUPERLU_FREE (work);
-    SUPERLU_FREE (iwork);
-    return;
-
-} /* sgscon */
-
diff --git a/superlu/sgsequ.c b/superlu/sgsequ.c
deleted file mode 100644
index b24e147f..00000000
--- a/superlu/sgsequ.c
+++ /dev/null
@@ -1,205 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  sgsequ.c
- * History:     Modified from LAPACK routine SGEEQU
- */
-#include <math.h>
-#include "slu_sdefs.h"
-
-void
-sgsequ(SuperMatrix *A, float *r, float *c, float *rowcnd,
-       float *colcnd, float *amax, int *info)
-{
-/*    
-    Purpose   
-    =======   
-
-    SGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_S; Mtype = SLU_GE.
-           
-    R       (output) float*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-           
-    C       (output) float*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-           
-    ROWCND  (output) float*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-           
-    COLCND  (output) float*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-           
-    AMAX    (output) float*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int i, j, irow;
-    float rcmin, rcmax;
-    float bignum, smlnum;
-    extern double slamch_(char *);
-    
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgsequ", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    bignum = 1. / smlnum;
-
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], fabs(Aval[i]) );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], fabs(Aval[i]) * r[irow] );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    return;
-
-} /* sgsequ */
-
-
diff --git a/superlu/sgsrfs.c b/superlu/sgsrfs.c
deleted file mode 100644
index c95c4433..00000000
--- a/superlu/sgsrfs.c
+++ /dev/null
@@ -1,446 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
- * File name:  sgsrfs.c
- * History:     Modified from lapack routine SGERFS
- */
-#include <math.h>
-#include "slu_sdefs.h"
-
-void
-sgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, float *R, float *C,
-       SuperMatrix *B, SuperMatrix *X, float *ferr, float *berr,
-       SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose   
- *   =======   
- *
- *   SGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = 
SLU_TRLU.
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           sgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) float*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) float*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by sgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) float*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) float*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
count.
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-
-#define ITMAX 5
-    
-    /* Table of constant values */
-    int    ione = 1;
-    float ndone = -1.;
-    float done = 1.;
-    
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    float   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    float   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    float   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    float   *work;
-    float   *rwork;
-    int      *iwork;
-    extern double slamch_(char *);
-    extern int slacon_(int *, float *, float *, int *, float *, int *);
-#ifdef _CRAY
-    extern int SCOPY(int *, float *, int *, float *, int *);
-    extern int SSAXPY(int *, float *, float *, int *, float *, int *);
-#else
-    extern int scopy_(int *, float *, int *, float *, int *);
-    extern int saxpy_(int *, float *, float *, int *, float *, int *);
-#endif
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_S || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgsrfs", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    
-    /* Allocate working space */
-    work = floatMalloc(2*A->nrow);
-    rwork = (float *) SUPERLU_MALLOC( A->nrow * sizeof(float) );
-    iwork = intMalloc(2*A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = slamch_("Epsilon");
-    safmin = slamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-       
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-
-       while (1) { /* Loop until stopping criterion is satisfied. */
-
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-           
-#ifdef _CRAY
-           SCOPY(&A->nrow, Bptr, &ione, work, &ione);
-#else
-           scopy_(&A->nrow, Bptr, &ione, work, &ione);
-#endif
-           sp_sgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-
-           for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-           
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = fabs( Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += fabs(Aval[i]) * fabs(Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, fabs(work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (fabs(work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               sgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-#ifdef _CRAY
-               SAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#else
-               saxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#endif
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-        
-       } /* end while */
-
-       stat->RefineSteps = count;
-
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-
-          Use SLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       
-       for (i = 0; i < A->nrow; ++i) rwork[i] = fabs( Bptr[i] );
-       
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = fabs( Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += fabs(Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = fabs( Xptr[irow] );
-                   s += fabs(Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = fabs(work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = fabs(work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-
-       kase = 0;
-
-       do {
-           slacon_(&A->nrow, &work[A->nrow], work,
-                   &iwork[A->nrow], &ferr[j], &kase);
-           if (kase == 0) break;
-
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) work[i] *= R[i];
-               
-               sgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) work[i] *= rwork[i];
-               
-               sgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= C[i];
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) work[i] *= R[i];  
-           }
-           
-       } while ( kase != 0 );
-
-
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * fabs( Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * fabs( Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, fabs( Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-
-    } /* for each RHS j ... */
-    
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-
-    return;
-
-} /* sgsrfs */
diff --git a/superlu/sgssv.c b/superlu/sgssv.c
deleted file mode 100644
index 3c657806..00000000
--- a/superlu/sgssv.c
+++ /dev/null
@@ -1,230 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_sdefs.h"
-
-void
-sgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * SGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from SGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_S; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    
-    /* Set default values for some parameters */
-    float   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("sgssv", &i);
-       return;
-    }
-
-    utime = stat->utime;
-
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-
-    etree = intMalloc(A->ncol);
-
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    sgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        sgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/sgssvx.c b/superlu/sgssvx.c
deleted file mode 100644
index c8392abe..00000000
--- a/superlu/sgssvx.c
+++ /dev/null
@@ -1,623 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_sdefs.h"
-
-void
-sgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, float *R, float *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, float *recip_pivot_growth, 
-       float *rcond, float *ferr, float *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * SGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from sgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) float*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) float*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) float*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) float*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) float*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) float*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-
-    DNformat  *Bstore, *Xstore;
-    float    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    float    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    float    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-
-    /* External functions */
-    extern float slangs(char *, SuperMatrix *);
-    extern double slamch_(char *);
-
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = slamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-#endif
-
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_S || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_S || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_S || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("sgssvx", &i);
-       return;
-    }
-    
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-
-    utime = stat->utime;
-    
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       sCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       sgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           slaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                       Bmat[i + j*ldb] *= R[i];
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Bmat[i + j*ldb] *= C[i];
-               }
-        }
-    }
-
-    if ( nofact ) {
-       
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-    
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       sgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = sPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = sPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-
-    if ( options->ConditionNumber ) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = slangs(norm, AA);
-        sgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-    }
-    
-    if ( nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-    
-        t0 = SuperLU_timer_();
-        sgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-    
-        /* Use iterative refinement to improve the computed solution and 
compute
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            sgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        Xmat[i + j*ldx] *= C[i];
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                   Xmat[i + j*ldx] *= R[i];
-                }
-        }
-    } /* end if nrhs > 0 */
-
-    if ( options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < slamch_("E") ) *info = A->ncol + 1;
-    }
-
-    if ( nofact ) {
-        sQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/sgstrf.c b/superlu/sgstrf.c
deleted file mode 100644
index 4da42045..00000000
--- a/superlu/sgstrf.c
+++ /dev/null
@@ -1,431 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_sdefs.h"
-extern void countnz();
-extern void fixupL();
-
-void
-sgstrf (superlu_options_t *options, SuperMatrix *A, float drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * SGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_S; Mtype = SLU_GE.
- *
- * drop_tol (input) float (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see spanel_dfs.c; marker2 is used for inner-factorization,
- *            see scolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during spanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in ssp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    float    *swork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    float    *dense, *tempv;
-    int       *relax_end;
-    float    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = sLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &swork);
-    if ( *info ) return;
-    
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    sSetRWork(m, panel_size, swork, &dense, &tempv);
-    
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = ssnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = sLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
)
-                   return;
-           }
-    
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-
-               /* Numeric update within the snode */
-               ssnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-               if ( (*info = spivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               
-#ifdef DEBUG
-               sprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol = icol;
-
-       } else { /* Work on one panel of panel_size columns */
-           
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-
-           /* symbolic factor on a panel of columns */
-           spanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           
-           /* numeric sup-panel updates in topological order */
-           spanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-               nseg = nseg1;   /* Begin after all the panel segments */
-
-               if ((*info = scolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) return;
-
-               /* Numeric updates */
-               if ((*info = scolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0) return;
-               
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = scopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                   return;
-
-               if ( (*info = spivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-
-               /* Prune columns (0:jj-1) using column jj */
-               spruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-               
-#ifdef DEBUG
-               sprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol += panel_size; /* Move to the next panel */
-
-       } /* else */
-
-    } /* for */
-
-    *info = iinfo;
-    
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-
-    countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-
-    sLUWorkFree(iwork, swork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        sCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_S, SLU_TRLU);
-       sCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_S, SLU_TRU);
-    }
-    
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-
-}
diff --git a/superlu/sgstrs.c b/superlu/sgstrs.c
deleted file mode 100644
index 7e2c6ecf..00000000
--- a/superlu/sgstrs.c
+++ /dev/null
@@ -1,331 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_sdefs.h"
-extern void strsm_();
-extern void sgemm_();
-
-
-/* 
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-void
-sgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * SGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * SGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         sgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_S, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         sgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_S, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_S, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    float   alpha = 1.0, beta = 1.0;
-    float   *work_col;
-#endif
-    DNformat *Bstore;
-    float   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    float   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void sprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_S || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_S || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_S || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sgstrs", &i);
-       return;
-    }
-
-    n = L->nrow;
-    work = floatCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = floatMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-
-           solve_ops += nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 2 * nrow * nsupc * nrhs;
-           
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       rhs_work[irow] -= rhs_work[fsupc] * Lval[luptr];
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               STRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               SGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#else
-               strsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               sgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#endif
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work_col[i]; /* Scatter */
-                       work_col[i] = 0.0;
-                       iptr++;
-                   }
-               }
-#else          
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   slsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   smatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       rhs_work[irow] -= work[i];
-                       work[i] = 0.0;
-                       iptr++;
-                   }
-               }
-#endif             
-           } /* else ... */
-       } /* for L-solve */
-
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       sprint_soln(n, nrhs, Bmat);
-#endif
-
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-
-           solve_ops += nsupc * (nsupc + 1) * nrhs;
-
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work[fsupc] /= Lval[luptr];
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               STRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-               strsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else          
-               for (j = 0; j < nrhs; j++)
-                   susolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif         
-           }
-
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       rhs_work[irow] -= rhs_work[jcol] * Uval[i];
-                   }
-               }
-           }
-           
-       } /* for U-solve */
-
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       sprint_soln(n, nrhs, Bmat);
-#endif
-
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-       stat->ops[SOLVE] = 0;
-       for (k = 0; k < nrhs; ++k) {
-           
-           /* Multiply by inv(U'). */
-           sp_strsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-           
-           /* Multiply by inv(L'). */
-           sp_strsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-           
-       }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-sprint_soln(int n, int nrhs, float *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i]);
-}
diff --git a/superlu/slacon.c b/superlu/slacon.c
deleted file mode 100644
index 27b7d40e..00000000
--- a/superlu/slacon.c
+++ /dev/null
@@ -1,249 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#include <math.h>
-#include "slu_Cnames.h"
-
-int
-slacon_(int *n, float *v, float *x, int *isgn, float *est, int *kase)
-
-{
-/*
-    Purpose   
-    =======   
-
-    SLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-  
-
-    Arguments   
-    =========   
-
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-
-    V      (workspace) FLOAT PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-
-    X      (input/output) FLOAT PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           and SLACON must be re-called with all the other parameters   
-           unchanged.   
-
-    ISGN   (workspace) INT array, dimension (N)
-
-    EST    (output) FLOAT PRECISION   
-           An estimate (a lower bound) for norm(A).   
-
-    KASE   (input/output) INT
-           On the initial call to SLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from SLACON, KASE will again be 0.   
-
-    Further Details   
-    ======= =======   
-
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-*/
-
-    /* Table of constant values */
-    int c__1 = 1;
-    float      zero = 0.0;
-    float      one = 1.0;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static float altsgn, estold;
-    static int i, j;
-    float temp;
-#ifdef _CRAY
-    extern int ISAMAX(int *, float *, int *);
-    extern float SASUM(int *, float *, int *);
-    extern int SCOPY(int *, float *, int *, float *, int *);
-#else
-    extern int isamax_(int *, float *, int *);
-    extern float sasum_(int *, float *, int *);
-    extern int scopy_(int *, float *, int *, float *, int *);
-#endif
-#define d_sign(a, b) (b >= 0 ? fabs(a) : -fabs(a))    /* Copy sign */
-#define i_dnnt(a) \
-       ( a>=0 ? floor(a+.5) : -floor(.5-a) ) /* Round to nearest integer */
-
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i] = 1. / (float) (*n);
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = fabs(v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-#ifdef _CRAY
-    *est = SASUM(n, x, &c__1);
-#else
-    *est = sasum_(n, x, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = isamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    SCOPY(n, x, &c__1, v, &c__1);
-#else
-    scopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-#ifdef _CRAY
-    *est = SASUM(n, v, &c__1);
-#else
-    *est = sasum_(n, v, &c__1);
-#endif
-
-    for (i = 0; i < *n; ++i)
-       if (i_dnnt(d_sign(one, x[i])) != isgn[i])
-           goto L90;
-
-    /*     REPEATED SIGN VECTOR DETECTED, HENCE ALGORITHM HAS CONVERGED. */
-    goto L120;
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-       x[i] = d_sign(one, x[i]);
-       isgn[i] = i_dnnt(x[i]);
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-          X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-#ifdef _CRAY
-    j = ISAMAX(n, &x[0], &c__1);
-#else
-    j = isamax_(n, &x[0], &c__1);
-#endif
-    --j;
-    if (x[jlast] != fabs(x[j]) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1] = altsgn * ((float)(i - 1) / (float)(*n - 1) + 1.);
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-#ifdef _CRAY
-    temp = SASUM(n, x, &c__1) / (float)(*n * 3) * 2.;
-#else
-    temp = sasum_(n, x, &c__1) / (float)(*n * 3) * 2.;
-#endif
-    if (temp > *est) {
-#ifdef _CRAY
-       SCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-       scopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-       *est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* slacon_ */
diff --git a/superlu/slamch.c b/superlu/slamch.c
deleted file mode 100644
index 08148e2d..00000000
--- a/superlu/slamch.c
+++ /dev/null
@@ -1,1023 +0,0 @@
-#include <stdio.h>
-#include "slu_Cnames.h"
-
-#define TRUE_ (1)
-#define FALSE_ (0)
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-#define abs(x) ((x) >= 0 ? (x) : -(x))
-#define dabs(x) (double)abs(x)
-
-double slamch_(char *cmach)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-
-       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-    Purpose   
-    =======   
-
-    SLAMCH determines single precision machine parameters.   
-
-    Arguments   
-    =========   
-
-    CMACH   (input) CHARACTER*1   
-            Specifies the value to be returned by SLAMCH:   
-            = 'E' or 'e',   SLAMCH := eps   
-            = 'S' or 's ,   SLAMCH := sfmin   
-            = 'B' or 'b',   SLAMCH := base   
-            = 'P' or 'p',   SLAMCH := eps*base   
-            = 'N' or 'n',   SLAMCH := t   
-            = 'R' or 'r',   SLAMCH := rnd   
-            = 'M' or 'm',   SLAMCH := emin   
-            = 'U' or 'u',   SLAMCH := rmin   
-            = 'L' or 'l',   SLAMCH := emax   
-            = 'O' or 'o',   SLAMCH := rmax   
-
-            where   
-
-            eps   = relative machine precision   
-            sfmin = safe minimum, such that 1/sfmin does not overflow   
-            base  = base of the machine   
-            prec  = eps*base   
-            t     = number of (base) digits in the mantissa   
-            rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise   
-            emin  = minimum exponent before (gradual) underflow   
-            rmin  = underflow threshold - base**(emin-1)   
-            emax  = largest exponent before overflow   
-            rmax  = overflow threshold  - (base**emax)*(1-eps)   
-
-   ===================================================================== 
-*/
-/* >>Start of File<<   
-       Initialized data */
-    static int first = TRUE_;
-    /* System generated locals */
-    int i__1;
-    float ret_val;
-    /* Builtin functions */
-    double pow_ri(float *, int *);
-    /* Local variables */
-    static float base;
-    static int beta;
-    static float emin, prec, emax;
-    static int imin, imax;
-    static int lrnd;
-    static float rmin, rmax, t, rmach;
-    extern int lsame_(char *, char *);
-    static float small, sfmin;
-    extern /* Subroutine */ int slamc2_(int *, int *, int *, float 
-           *, int *, float *, int *, float *);
-    static int it;
-    static float rnd, eps;
-
-
-
-    if (first) {
-       first = FALSE_;
-       slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax);
-       base = (float) beta;
-       t = (float) it;
-       if (lrnd) {
-           rnd = 1.f;
-           i__1 = 1 - it;
-           eps = pow_ri(&base, &i__1) / 2;
-       } else {
-           rnd = 0.f;
-           i__1 = 1 - it;
-           eps = pow_ri(&base, &i__1);
-       }
-       prec = eps * base;
-       emin = (float) imin;
-       emax = (float) imax;
-       sfmin = rmin;
-       small = 1.f / rmax;
-       if (small >= sfmin) {
-
-/*           Use SMALL plus a bit, to avoid the possibility of rou
-nding   
-             causing overflow when computing  1/sfmin. */
-
-           sfmin = small * (eps + 1.f);
-       }
-    }
-
-    if (lsame_(cmach, "E")) {
-       rmach = eps;
-    } else if (lsame_(cmach, "S")) {
-       rmach = sfmin;
-    } else if (lsame_(cmach, "B")) {
-       rmach = base;
-    } else if (lsame_(cmach, "P")) {
-       rmach = prec;
-    } else if (lsame_(cmach, "N")) {
-       rmach = t;
-    } else if (lsame_(cmach, "R")) {
-       rmach = rnd;
-    } else if (lsame_(cmach, "M")) {
-       rmach = emin;
-    } else if (lsame_(cmach, "U")) {
-       rmach = rmin;
-    } else if (lsame_(cmach, "L")) {
-       rmach = emax;
-    } else if (lsame_(cmach, "O")) {
-       rmach = rmax;
-    }
-
-    ret_val = rmach;
-    return ret_val;
-
-/*     End of SLAMCH */
-
-} /* slamch_ */
-
-
-/* Subroutine */ int slamc1_(int *beta, int *t, int *rnd, int 
-       *ieee1)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    SLAMC1 determines the machine parameters given by BETA, T, RND, and   
-    IEEE1.   
-
-    Arguments   
-    =========   
-
-    BETA    (output) INT   
-            The base of the machine.   
-
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-  
-            be a reliable guide to the way in which the machine performs 
-  
-            its arithmetic.   
-
-    IEEE1   (output) INT   
-            Specifies whether rounding appears to be done in the IEEE   
-            'round to nearest' style.   
-
-    Further Details   
-    ===============   
-
-    The routine is based on the routine  ENVRON  by Malcolm and   
-    incorporates suggestions by Gentleman and Marovich. See   
-
-       Malcolm M. A. (1972) Algorithms to reveal properties of   
-          floating-point arithmetic. Comms. of the ACM, 15, 949-951.   
-
-       Gentleman W. M. and Marovich S. B. (1974) More on algorithms   
-          that reveal properties of floating point arithmetic units.   
-          Comms. of the ACM, 17, 276-277.   
-
-   ===================================================================== 
-*/
-    /* Initialized data */
-    static int first = TRUE_;
-    /* System generated locals */
-    float r__1, r__2;
-    /* Local variables */
-    static int lrnd;
-    static float a, b, c, f;
-    static int lbeta;
-    static float savec;
-    static int lieee1;
-    static float t1, t2;
-    extern double slamc3_(float *, float *);
-    static int lt;
-    static float one, qtr;
-
-
-
-    if (first) {
-       first = FALSE_;
-       one = 1.f;
-
-/*        LBETA,  LIEEE1,  LT and  LRND  are the  local values  of  BE
-TA,   
-          IEEE1, T and RND.   
-
-          Throughout this routine  we use the function  SLAMC3  to ens
-ure   
-          that relevant values are  stored and not held in registers, 
- or   
-          are not affected by optimizers.   
-
-          Compute  a = 2.0**m  with the  smallest positive integer m s
-uch   
-          that   
-
-             fl( a + 1.0 ) = a. */
-
-       a = 1.f;
-       c = 1.f;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L10:
-       if (c == one) {
-           a *= 2;
-           c = slamc3_(&a, &one);
-           r__1 = -(double)a;
-           c = slamc3_(&c, &r__1);
-           goto L10;
-       }
-/* +       END WHILE   
-
-          Now compute  b = 2.0**m  with the smallest positive integer 
-m   
-          such that   
-
-             fl( a + b ) .gt. a. */
-
-       b = 1.f;
-       c = slamc3_(&a, &b);
-
-/* +       WHILE( C.EQ.A )LOOP */
-L20:
-       if (c == a) {
-           b *= 2;
-           c = slamc3_(&a, &b);
-           goto L20;
-       }
-/* +       END WHILE   
-
-          Now compute the base.  a and c  are neighbouring floating po
-int   
-          numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and
- so   
-          their difference is beta. Adding 0.25 to c is to ensure that
- it   
-          is truncated to beta and not ( beta - 1 ). */
-
-       qtr = one / 4;
-       savec = c;
-       r__1 = -(double)a;
-       c = slamc3_(&c, &r__1);
-       lbeta = c + qtr;
-
-/*        Now determine whether rounding or chopping occurs,  by addin
-g a   
-          bit  less  than  beta/2  and a  bit  more  than  beta/2  to 
- a. */
-
-       b = (float) lbeta;
-       r__1 = b / 2;
-       r__2 = -(double)b / 100;
-       f = slamc3_(&r__1, &r__2);
-       c = slamc3_(&f, &a);
-       if (c == a) {
-           lrnd = TRUE_;
-       } else {
-           lrnd = FALSE_;
-       }
-       r__1 = b / 2;
-       r__2 = b / 100;
-       f = slamc3_(&r__1, &r__2);
-       c = slamc3_(&f, &a);
-       if (lrnd && c == a) {
-           lrnd = FALSE_;
-       }
-
-/*        Try and decide whether rounding is done in the  IEEE  'round
- to   
-          nearest' style. B/2 is half a unit in the last place of the 
-two   
-          numbers A and SAVEC. Furthermore, A is even, i.e. has last  
-bit   
-          zero, and SAVEC is odd. Thus adding B/2 to A should not  cha
-nge   
-          A, but adding B/2 to SAVEC should change SAVEC. */
-
-       r__1 = b / 2;
-       t1 = slamc3_(&r__1, &a);
-       r__1 = b / 2;
-       t2 = slamc3_(&r__1, &savec);
-       lieee1 = t1 == a && t2 > savec && lrnd;
-
-/*        Now find  the  mantissa, t.  It should  be the  integer part
- of   
-          log to the base beta of a,  however it is safer to determine
-  t   
-          by powering.  So we find t as the smallest positive integer 
-for   
-          which   
-
-             fl( beta**t + 1.0 ) = 1.0. */
-
-       lt = 0;
-       a = 1.f;
-       c = 1.f;
-
-/* +       WHILE( C.EQ.ONE )LOOP */
-L30:
-       if (c == one) {
-           ++lt;
-           a *= lbeta;
-           c = slamc3_(&a, &one);
-           r__1 = -(double)a;
-           c = slamc3_(&c, &r__1);
-           goto L30;
-       }
-/* +       END WHILE */
-
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *ieee1 = lieee1;
-    return 0;
-
-/*     End of SLAMC1 */
-
-} /* slamc1_ */
-
-
-/* Subroutine */ int slamc2_(int *beta, int *t, int *rnd, float *
-       eps, int *emin, float *rmin, int *emax, float *rmax)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    SLAMC2 determines the machine parameters specified in its argument   
-    list.   
-
-    Arguments   
-    =========   
-
-    BETA    (output) INT   
-            The base of the machine.   
-
-    T       (output) INT   
-            The number of ( BETA ) digits in the mantissa.   
-
-    RND     (output) INT   
-            Specifies whether proper rounding  ( RND = .TRUE. )  or   
-            chopping  ( RND = .FALSE. )  occurs in addition. This may not 
-  
-            be a reliable guide to the way in which the machine performs 
-  
-            its arithmetic.   
-
-    EPS     (output) FLOAT   
-            The smallest positive number such that   
-
-               fl( 1.0 - EPS ) .LT. 1.0,   
-
-            where fl denotes the computed value.   
-
-    EMIN    (output) INT   
-            The minimum exponent before (gradual) underflow occurs.   
-
-    RMIN    (output) FLOAT   
-            The smallest normalized number for the machine, given by   
-            BASE**( EMIN - 1 ), where  BASE  is the floating point value 
-  
-            of BETA.   
-
-    EMAX    (output) INT   
-            The maximum exponent before overflow occurs.   
-
-    RMAX    (output) FLOAT   
-            The largest positive number for the machine, given by   
-            BASE**EMAX * ( 1 - EPS ), where  BASE  is the floating point 
-  
-            value of BETA.   
-
-    Further Details   
-    ===============   
-
-    The computation of  EPS  is based on a routine PARANOIA by   
-    W. Kahan of the University of California at Berkeley.   
-
-   ===================================================================== 
-*/
-    /* Table of constant values */
-    static int c__1 = 1;
-    
-    /* Initialized data */
-    static int first = TRUE_;
-    static int iwarn = FALSE_;
-    /* System generated locals */
-    int i__1;
-    float r__1, r__2, r__3, r__4, r__5;
-    /* Builtin functions */
-    double pow_ri(float *, int *);
-    /* Local variables */
-    static int ieee;
-    static float half;
-    static int lrnd;
-    static float leps, zero, a, b, c;
-    static int i, lbeta;
-    static float rbase;
-    static int lemin, lemax, gnmin;
-    static float small;
-    static int gpmin;
-    static float third, lrmin, lrmax, sixth;
-    static int lieee1;
-    extern /* Subroutine */ int slamc1_(int *, int *, int *, 
-           int *);
-    extern double slamc3_(float *, float *);
-    extern /* Subroutine */ int slamc4_(int *, float *, int *), 
-           slamc5_(int *, int *, int *, int *, int *, 
-           float *);
-    static int lt, ngnmin, ngpmin;
-    static float one, two;
-
-
-
-    if (first) {
-       first = FALSE_;
-       zero = 0.f;
-       one = 1.f;
-       two = 2.f;
-
-/*        LBETA, LT, LRND, LEPS, LEMIN and LRMIN  are the local values
- of   
-          BETA, T, RND, EPS, EMIN and RMIN.   
-
-          Throughout this routine  we use the function  SLAMC3  to ens
-ure   
-          that relevant values are stored  and not held in registers, 
- or   
-          are not affected by optimizers.   
-
-          SLAMC1 returns the parameters  LBETA, LT, LRND and LIEEE1. 
-*/
-
-       slamc1_(&lbeta, &lt, &lrnd, &lieee1);
-
-/*        Start to find EPS. */
-
-       b = (float) lbeta;
-       i__1 = -lt;
-       a = pow_ri(&b, &i__1);
-       leps = a;
-
-/*        Try some tricks to see whether or not this is the correct  E
-PS. */
-
-       b = two / 3;
-       half = one / 2;
-       r__1 = -(double)half;
-       sixth = slamc3_(&b, &r__1);
-       third = slamc3_(&sixth, &sixth);
-       r__1 = -(double)half;
-       b = slamc3_(&third, &r__1);
-       b = slamc3_(&b, &sixth);
-       b = dabs(b);
-       if (b < leps) {
-           b = leps;
-       }
-
-       leps = 1.f;
-
-/* +       WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */
-L10:
-       if (leps > b && b > zero) {
-           leps = b;
-           r__1 = half * leps;
-/* Computing 5th power */
-           r__3 = two, r__4 = r__3, r__3 *= r__3;
-/* Computing 2nd power */
-           r__5 = leps;
-           r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5);
-           c = slamc3_(&r__1, &r__2);
-           r__1 = -(double)c;
-           c = slamc3_(&half, &r__1);
-           b = slamc3_(&half, &c);
-           r__1 = -(double)b;
-           c = slamc3_(&half, &r__1);
-           b = slamc3_(&half, &c);
-           goto L10;
-       }
-/* +       END WHILE */
-
-       if (a < leps) {
-           leps = a;
-       }
-
-/*        Computation of EPS complete.   
-
-          Now find  EMIN.  Let A = + or - 1, and + or - (1 + BASE**(-3
-)).   
-          Keep dividing  A by BETA until (gradual) underflow occurs. T
-his   
-          is detected when we cannot recover the previous A. */
-
-       rbase = one / lbeta;
-       small = one;
-       for (i = 1; i <= 3; ++i) {
-           r__1 = small * rbase;
-           small = slamc3_(&r__1, &zero);
-/* L20: */
-       }
-       a = slamc3_(&one, &small);
-       slamc4_(&ngpmin, &one, &lbeta);
-       r__1 = -(double)one;
-       slamc4_(&ngnmin, &r__1, &lbeta);
-       slamc4_(&gpmin, &a, &lbeta);
-       r__1 = -(double)a;
-       slamc4_(&gnmin, &r__1, &lbeta);
-       ieee = FALSE_;
-
-       if (ngpmin == ngnmin && gpmin == gnmin) {
-           if (ngpmin == gpmin) {
-               lemin = ngpmin;
-/*            ( Non twos-complement machines, no gradual under
-flow;   
-                e.g.,  VAX ) */
-           } else if (gpmin - ngpmin == 3) {
-               lemin = ngpmin - 1 + lt;
-               ieee = TRUE_;
-/*            ( Non twos-complement machines, with gradual und
-erflow;   
-                e.g., IEEE standard followers ) */
-           } else {
-               lemin = min(ngpmin,gpmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-
-       } else if (ngpmin == gpmin && ngnmin == gnmin) {
-           if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) {
-               lemin = max(ngpmin,ngnmin);
-/*            ( Twos-complement machines, no gradual underflow
-;   
-                e.g., CYBER 205 ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-
-       } else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 && gpmin == gnmin)
-                {
-           if (gpmin - min(ngpmin,ngnmin) == 3) {
-               lemin = max(ngpmin,ngnmin) - 1 + lt;
-/*            ( Twos-complement machines with gradual underflo
-w;   
-                no known machine ) */
-           } else {
-               lemin = min(ngpmin,ngnmin);
-/*            ( A guess; no known machine ) */
-               iwarn = TRUE_;
-           }
-
-       } else {
-/* Computing MIN */
-           i__1 = min(ngpmin,ngnmin), i__1 = min(i__1,gpmin);
-           lemin = min(i__1,gnmin);
-/*         ( A guess; no known machine ) */
-           iwarn = TRUE_;
-       }
-/* **   
-   Comment out this if block if EMIN is ok */
-       if (iwarn) {
-           first = TRUE_;
-           printf("\n\n WARNING. The value EMIN may be incorrect:- ");
-           printf("EMIN = %8i\n",lemin);
-           printf("If, after inspection, the value EMIN looks acceptable");
-            printf("please comment out \n the IF block as marked within the"); 
-            printf("code of routine SLAMC2, \n otherwise supply EMIN"); 
-            printf("explicitly.\n");
-       }
-/* **   
-
-          Assume IEEE arithmetic if we found denormalised  numbers abo
-ve,   
-          or if arithmetic seems to round in the  IEEE style,  determi
-ned   
-          in routine SLAMC1. A true IEEE machine should have both  thi
-ngs   
-          true; however, faulty machines may have one or the other. */
-
-       ieee = ieee || lieee1;
-
-/*        Compute  RMIN by successive division by  BETA. We could comp
-ute   
-          RMIN as BASE**( EMIN - 1 ),  but some machines underflow dur
-ing   
-          this computation. */
-
-       lrmin = 1.f;
-       i__1 = 1 - lemin;
-       for (i = 1; i <= 1-lemin; ++i) {
-           r__1 = lrmin * rbase;
-           lrmin = slamc3_(&r__1, &zero);
-/* L30: */
-       }
-
-/*        Finally, call SLAMC5 to compute EMAX and RMAX. */
-
-       slamc5_(&lbeta, &lt, &lemin, &ieee, &lemax, &lrmax);
-    }
-
-    *beta = lbeta;
-    *t = lt;
-    *rnd = lrnd;
-    *eps = leps;
-    *emin = lemin;
-    *rmin = lrmin;
-    *emax = lemax;
-    *rmax = lrmax;
-
-    return 0;
-
-
-/*     End of SLAMC2 */
-
-} /* slamc2_ */
-
-
-double slamc3_(float *a, float *b)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    SLAMC3  is intended to force  A  and  B  to be stored prior to doing 
-  
-    the addition of  A  and  B ,  for use in situations where optimizers 
-  
-    might hold one of these in a register.   
-
-    Arguments   
-    =========   
-
-    A, B    (input) FLOAT   
-            The values A and B.   
-
-   ===================================================================== 
-*/
-/* >>Start of File<<   
-       System generated locals */
-  volatile float ret_val; /* [added volatile to avoid -O3 optimizations.. 
(julien pommier)] */
-
-
-
-    ret_val = *a + *b;
-
-    return ret_val;
-
-/*     End of SLAMC3 */
-
-} /* slamc3_ */
-
-
-/* Subroutine */ int slamc4_(int *emin, float *start, int *base)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    SLAMC4 is a service routine for SLAMC2.   
-
-    Arguments   
-    =========   
-
-    EMIN    (output) EMIN   
-            The minimum exponent before (gradual) underflow, computed by 
-  
-            setting A = START and dividing by BASE until the previous A   
-            can not be recovered.   
-
-    START   (input) FLOAT   
-            The starting point for determining EMIN.   
-
-    BASE    (input) INT   
-            The base of the machine.   
-
-   ===================================================================== 
-*/
-    /* System generated locals */
-    int i__1;
-    float r__1;
-    /* Local variables */
-    static float zero, a;
-    static int i;
-    static float rbase, b1, b2, c1, c2, d1, d2;
-    extern double slamc3_(float *, float *);
-    static float one;
-
-
-
-    a = *start;
-    one = 1.f;
-    rbase = one / *base;
-    zero = 0.f;
-    *emin = 1;
-    r__1 = a * rbase;
-    b1 = slamc3_(&r__1, &zero);
-    c1 = a;
-    c2 = a;
-    d1 = a;
-    d2 = a;
-/* +    WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND.   
-      $       ( D1.EQ.A ).AND.( D2.EQ.A )      )LOOP */
-L10:
-    if (c1 == a && c2 == a && d1 == a && d2 == a) {
-       --(*emin);
-       a = b1;
-       r__1 = a / *base;
-       b1 = slamc3_(&r__1, &zero);
-       r__1 = b1 * *base;
-       c1 = slamc3_(&r__1, &zero);
-       d1 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d1 += b1;
-/* L20: */
-       }
-       r__1 = a * rbase;
-       b2 = slamc3_(&r__1, &zero);
-       r__1 = b2 / rbase;
-       c2 = slamc3_(&r__1, &zero);
-       d2 = zero;
-       i__1 = *base;
-       for (i = 1; i <= *base; ++i) {
-           d2 += b2;
-/* L30: */
-       }
-       goto L10;
-    }
-/* +    END WHILE */
-
-    return 0;
-
-/*     End of SLAMC4 */
-
-} /* slamc4_ */
-
-
-/* Subroutine */ int slamc5_(int *beta, int *p, int *emin, 
-       int *ieee, int *emax, float *rmax)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,   
-       Courant Institute, Argonne National Lab, and Rice University   
-       October 31, 1992   
-
-
-    Purpose   
-    =======   
-
-    SLAMC5 attempts to compute RMAX, the largest machine floating-point   
-    number, without overflow.  It assumes that EMAX + abs(EMIN) sum   
-    approximately to a power of 2.  It will fail on machines where this   
-    assumption does not hold, for example, the Cyber 205 (EMIN = -28625, 
-  
-    EMAX = 28718).  It will also fail if the value supplied for EMIN is   
-    too large (i.e. too close to zero), probably with overflow.   
-
-    Arguments   
-    =========   
-
-    BETA    (input) INT   
-            The base of floating-point arithmetic.   
-
-    P       (input) INT   
-            The number of base BETA digits in the mantissa of a   
-            floating-point value.   
-
-    EMIN    (input) INT   
-            The minimum exponent before (gradual) underflow.   
-
-    IEEE    (input) INT   
-            A logical flag specifying whether or not the arithmetic   
-            system is thought to comply with the IEEE standard.   
-
-    EMAX    (output) INT   
-            The largest exponent before overflow   
-
-    RMAX    (output) FLOAT   
-            The largest machine floating-point number.   
-
-   ===================================================================== 
-  
-
-
-       First compute LEXP and UEXP, two powers of 2 that bound   
-       abs(EMIN). We then assume that EMAX + abs(EMIN) will sum   
-       approximately to the bound that is closest to abs(EMIN).   
-       (EMAX is the exponent of the required number RMAX). */
-    /* Table of constant values */
-    static float c_b5 = 0.f;
-    
-    /* System generated locals */
-    int i__1;
-    float r__1;
-    /* Local variables */
-    static int lexp;
-    static float oldy;
-    static int uexp, i;
-    static float y, z;
-    static int nbits;
-    extern double slamc3_(float *, float *);
-    static float recbas;
-    static int exbits, expsum, try__;
-
-
-
-    lexp = 1;
-    exbits = 1;
-L10:
-    try__ = lexp << 1;
-    if (try__ <= -(*emin)) {
-       lexp = try__;
-       ++exbits;
-       goto L10;
-    }
-    if (lexp == -(*emin)) {
-       uexp = lexp;
-    } else {
-       uexp = try__;
-       ++exbits;
-    }
-
-/*     Now -LEXP is less than or equal to EMIN, and -UEXP is greater   
-       than or equal to EMIN. EXBITS is the number of bits needed to   
-       store the exponent. */
-
-    if (uexp + *emin > -lexp - *emin) {
-       expsum = lexp << 1;
-    } else {
-       expsum = uexp << 1;
-    }
-
-/*     EXPSUM is the exponent range, approximately equal to   
-       EMAX - EMIN + 1 . */
-
-    *emax = expsum + *emin - 1;
-    nbits = exbits + 1 + *p;
-
-/*     NBITS is the total number of bits needed to store a   
-       floating-point number. */
-
-    if (nbits % 2 == 1 && *beta == 2) {
-
-/*        Either there are an odd number of bits used to store a   
-          floating-point number, which is unlikely, or some bits are 
-  
-          not used in the representation of numbers, which is possible
-,   
-          (e.g. Cray machines) or the mantissa has an implicit bit,   
-          (e.g. IEEE machines, Dec Vax machines), which is perhaps the
-   
-          most likely. We have to assume the last alternative.   
-          If this is true, then we need to reduce EMAX by one because 
-  
-          there must be some way of representing zero in an implicit-b
-it   
-          system. On machines like Cray, we are reducing EMAX by one 
-  
-          unnecessarily. */
-
-       --(*emax);
-    }
-
-    if (*ieee) {
-
-/*        Assume we are on an IEEE machine which reserves one exponent
-   
-          for infinity and NaN. */
-
-       --(*emax);
-    }
-
-/*     Now create RMAX, the largest machine number, which should   
-       be equal to (1.0 - BETA**(-P)) * BETA**EMAX .   
-
-       First compute 1.0 - BETA**(-P), being careful that the   
-       result is less than 1.0 . */
-
-    recbas = 1.f / *beta;
-    z = *beta - 1.f;
-    y = 0.f;
-    i__1 = *p;
-    for (i = 1; i <= *p; ++i) {
-       z *= recbas;
-       if (y < 1.f) {
-           oldy = y;
-       }
-       y = slamc3_(&y, &z);
-/* L20: */
-    }
-    if (y >= 1.f) {
-       y = oldy;
-    }
-
-/*     Now multiply by BETA**EMAX to get RMAX. */
-
-    i__1 = *emax;
-    for (i = 1; i <= *emax; ++i) {
-       r__1 = y * *beta;
-       y = slamc3_(&r__1, &c_b5);
-/* L30: */
-    }
-
-    *rmax = y;
-    return 0;
-
-/*     End of SLAMC5 */
-
-} /* slamc5_ */
-
-
-double pow_ri(float *ap, int *bp)
-{
-double pow, x;
-int n;
-
-pow = 1;
-x = *ap;
-n = *bp;
-
-if(n != 0)
-       {
-       if(n < 0)
-               {
-               n = -n;
-               x = 1/x;
-               }
-       for( ; ; )
-               {
-               if(n & 01)
-                       pow *= x;
-               if(n >>= 1)
-                       x *= x;
-               else
-                       break;
-               }
-       }
-return(pow);
-}
diff --git a/superlu/slangs.c b/superlu/slangs.c
deleted file mode 100644
index f82368fc..00000000
--- a/superlu/slangs.c
+++ /dev/null
@@ -1,131 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  slangs.c
- * History:     Modified from lapack routine SLANGE
- */
-#include <math.h>
-#include "slu_sdefs.h"
-
-float slangs(char *norm, SuperMatrix *A)
-{
-/* 
-    Purpose   
-    =======   
-
-    SLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-
-    Description   
-    ===========   
-
-    SLANGE returns the value   
-
-       SLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-
-    Arguments   
-    =========   
-
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in SLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-
-   ===================================================================== 
-*/
-    
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int      i, j, irow;
-    float   value, sum;
-    float   *rwork;
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-       
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, fabs( Aval[i]) );
-       
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += fabs(Aval[i]);
-           value = SUPERLU_MAX(value,sum);
-       }
-       
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (float *) SUPERLU_MALLOC(A->nrow * sizeof(float))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += fabs(Aval[i]);
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       
-       SUPERLU_FREE (rwork);
-       
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-
-    return (value);
-
-} /* slangs */
-
diff --git a/superlu/slaqgs.c b/superlu/slaqgs.c
deleted file mode 100644
index 083d6652..00000000
--- a/superlu/slaqgs.c
+++ /dev/null
@@ -1,157 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  slaqgs.c
- * History:     Modified from LAPACK routine SLAQGE
- */
-#include <math.h>
-#include "slu_sdefs.h"
-
-void
-slaqgs(SuperMatrix *A, float *r, float *c, 
-       float rowcnd, float colcnd, float amax, char *equed)
-{
-/*
-    Purpose   
-    =======   
-
-    SLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-
-    Arguments   
-    =========   
-
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_S; Mtype = GE.
-           
-    R       (input) float*, dimension (A->nrow)
-            The row scale factors for A.
-           
-    C       (input) float*, dimension (A->ncol)
-            The column scale factors for A.
-           
-    ROWCND  (input) float
-            Ratio of the smallest R(i) to the largest R(i).
-           
-    COLCND  (input) float
-            Ratio of the smallest C(i) to the largest C(i).
-           
-    AMAX    (input) float
-            Absolute value of largest matrix entry.
-           
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-
-    Internal Parameters   
-    ===================   
-
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-
-    ===================================================================== 
-*/
-
-#define THRESH    (0.1)
-    
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int i, j, irow;
-    float large, small, cj;
-    extern double slamch_(char *);
-
-
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Initialize LARGE and SMALL. */
-    small = slamch_("Safe minimum") / slamch_("Precision");
-    large = 1. / small;
-
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   Aval[i] *= cj;
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= r[irow];
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               Aval[i] *= cj * r[irow];
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-
-    return;
-
-} /* slaqgs */
-
diff --git a/superlu/slu_Cnames.h b/superlu/slu_Cnames.h
deleted file mode 100644
index 7c8e7dd5..00000000
--- a/superlu/slu_Cnames.h
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 1, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#ifndef __SUPERLU_CNAMES /* allow multiple inclusions */
-#define __SUPERLU_CNAMES
-
-/*
- * These macros define how C routines will be called.  ADD_ assumes that
- * they will be called by fortran, which expects C routines to have an
- * underscore postfixed to the name (Suns, and the Intel expect this).
- * NOCHANGE indicates that fortran will be calling, and that it expects
- * the name called by fortran to be identical to that compiled by the C
- * (RS6K's do this).  UPCASE says it expects C routines called by fortran
- * to be in all upcase (CRAY wants this). 
- */
-
-#define ADD_       0
-#define ADD__      1
-#define NOCHANGE   2
-#define UPCASE     3
-#define C_CALL     4
-
-#ifdef UpCase
-#define F77_CALL_C UPCASE
-#endif
-
-#ifdef NoChange
-#define F77_CALL_C NOCHANGE
-#endif
-
-#ifdef Add_
-#define F77_CALL_C ADD_
-#endif
-
-#ifdef Add__
-#define F77_CALL_C ADD__
-#endif
-
-/* Default */
-#ifndef F77_CALL_C
-#define F77_CALL_C ADD_
-#endif
-
-
-#if (F77_CALL_C == ADD_)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine
- * No redefinition necessary to have following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm_(...)
- *
- * This is the default.
- */
-
-#endif
-
-#if (F77_CALL_C == ADD__)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * for following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm__(...)
- */
-/* BLAS */
-#define sasum_    sasum__
-#define isamax_   isamax__
-#define scopy_    scopy__
-#define sscal_    sscal__
-#define sger_     sger__
-#define snrm2_    snrm2__
-#define ssymv_    ssymv__
-#define sdot_     sdot__
-#define saxpy_    saxpy__
-#define ssyr2_    ssyr2__
-#define srot_     srot__
-#define sgemv_    sgemv__
-#define strsv_    strsv__
-#define sgemm_    sgemm__
-#define strsm_    strsm__
-
-#define dasum_    dasum__
-#define idamax_   idamax__
-#define dcopy_    dcopy__
-#define dscal_    dscal__
-#define dger_     dger__
-#define dnrm2_    dnrm2__
-#define dsymv_    dsymv__
-#define ddot_     ddot__
-#define daxpy_    daxpy__
-#define dsyr2_    dsyr2__
-#define drot_     drot__
-#define dgemv_    dgemv__
-#define dtrsv_    dtrsv__
-#define dgemm_    dgemm__
-#define dtrsm_    dtrsm__
-
-#define scasum_   scasum__
-#define icamax_   icamax__
-#define ccopy_    ccopy__
-#define cscal_    cscal__
-#define scnrm2_   scnrm2__
-#define caxpy_    caxpy__
-#define cgemv_    cgemv__
-#define ctrsv_    ctrsv__
-#define cgemm_    cgemm__
-#define ctrsm_    ctrsm__
-#define cgerc_    cgerc__
-#define chemv_    chemv__
-#define cher2_    cher2__
-
-#define dzasum_   dzasum__
-#define izamax_   izamax__
-#define zcopy_    zcopy__
-#define zscal_    zscal__
-#define dznrm2_   dznrm2__
-#define zaxpy_    zaxpy__
-#define zgemv_    zgemv__
-#define ztrsv_    ztrsv__
-#define zgemm_    zgemm__
-#define ztrsm_    ztrsm__
-#define zgerc_    zgerc__
-#define zhemv_    zhemv__
-#define zher2_    zher2__
-
-/* LAPACK */
-#define dlamch_   dlamch__
-#define slamch_   slamch__
-#define xerbla_   xerbla__
-#define lsame_    lsame__
-#define dlacon_   dlacon__
-#define slacon_   slacon__
-#define icmax1_   icmax1__
-#define scsum1_   scsum1__
-#define clacon_   clacon__
-#define dzsum1_   dzsum1__
-#define izmax1_   izmax1__
-#define zlacon_   zlacon__
-
-/* Fortran interface */
-#define c_bridge_dgssv_ c_bridge_dgssv__
-#define c_fortran_sgssv_ c_fortran_sgssv__
-#define c_fortran_dgssv_ c_fortran_dgssv__
-#define c_fortran_cgssv_ c_fortran_cgssv__
-#define c_fortran_zgssv_ c_fortran_zgssv__
-#endif
-
-#if (F77_CALL_C == UPCASE)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void DGEMM(...)
- */
-/* BLAS */
-#define sasum_    SASUM
-#define isamax_   ISAMAX
-#define scopy_    SCOPY
-#define sscal_    SSCAL
-#define sger_     SGER
-#define snrm2_    SNRM2
-#define ssymv_    SSYMV
-#define sdot_     SDOT
-#define saxpy_    SAXPY
-#define ssyr2_    SSYR2
-#define srot_     SROT
-#define sgemv_    SGEMV
-#define strsv_    STRSV
-#define sgemm_    SGEMM
-#define strsm_    STRSM
-
-#define dasum_    SASUM
-#define idamax_   ISAMAX
-#define dcopy_    SCOPY
-#define dscal_    SSCAL
-#define dger_     SGER
-#define dnrm2_    SNRM2
-#define dsymv_    SSYMV
-#define ddot_     SDOT
-#define daxpy_    SAXPY
-#define dsyr2_    SSYR2
-#define drot_     SROT
-#define dgemv_    SGEMV
-#define dtrsv_    STRSV
-#define dgemm_    SGEMM
-#define dtrsm_    STRSM
-
-#define scasum_   SCASUM
-#define icamax_   ICAMAX
-#define ccopy_    CCOPY
-#define cscal_    CSCAL
-#define scnrm2_   SCNRM2
-#define caxpy_    CAXPY
-#define cgemv_    CGEMV
-#define ctrsv_    CTRSV
-#define cgemm_    CGEMM
-#define ctrsm_    CTRSM
-#define cgerc_    CGERC
-#define chemv_    CHEMV
-#define cher2_    CHER2
-
-#define dzasum_   SCASUM
-#define izamax_   ICAMAX
-#define zcopy_    CCOPY
-#define zscal_    CSCAL
-#define dznrm2_   SCNRM2
-#define zaxpy_    CAXPY
-#define zgemv_    CGEMV
-#define ztrsv_    CTRSV
-#define zgemm_    CGEMM
-#define ztrsm_    CTRSM
-#define zgerc_    CGERC
-#define zhemv_    CHEMV
-#define zher2_    CHER2
-
-/* LAPACK */
-#define dlamch_   DLAMCH
-#define slamch_   SLAMCH
-#define xerbla_   XERBLA
-#define lsame_    LSAME
-#define dlacon_   DLACON
-#define slacon_   SLACON
-#define icmax1_   ICMAX1
-#define scsum1_   SCSUM1
-#define clacon_   CLACON
-#define dzsum1_   DZSUM1
-#define izmax1_   IZMAX1
-#define zlacon_   ZLACON
-
-/* Fortran interface */
-#define c_bridge_dgssv_ C_BRIDGE_DGSSV
-#define c_fortran_sgssv_ C_FORTRAN_SGSSV
-#define c_fortran_dgssv_ C_FORTRAN_DGSSV
-#define c_fortran_cgssv_ C_FORTRAN_CGSSV
-#define c_fortran_zgssv_ C_FORTRAN_ZGSSV
-#endif
-
-#if (F77_CALL_C == NOCHANGE)
-/*
- * These defines set up the naming scheme required to have a fortran 77
- * routine call a C routine 
- * for following Fortran to C interface:
- *           FORTRAN CALL               C DECLARATION
- *           call dgemm(...)           void dgemm(...)
- */
-/* BLAS */
-#define sasum_    sasum
-#define isamax_   isamax
-#define scopy_    scopy
-#define sscal_    sscal
-#define sger_     sger
-#define snrm2_    snrm2
-#define ssymv_    ssymv
-#define sdot_     sdot
-#define saxpy_    saxpy
-#define ssyr2_    ssyr2
-#define srot_     srot
-#define sgemv_    sgemv
-#define strsv_    strsv
-#define sgemm_    sgemm
-#define strsm_    strsm
-
-#define dasum_    dasum
-#define idamax_   idamax
-#define dcopy_    dcopy
-#define dscal_    dscal
-#define dger_     dger
-#define dnrm2_    dnrm2
-#define dsymv_    dsymv
-#define ddot_     ddot
-#define daxpy_    daxpy
-#define dsyr2_    dsyr2
-#define drot_     drot
-#define dgemv_    dgemv
-#define dtrsv_    dtrsv
-#define dgemm_    dgemm
-#define dtrsm_    dtrsm
-
-#define scasum_   scasum
-#define icamax_   icamax
-#define ccopy_    ccopy
-#define cscal_    cscal
-#define scnrm2_   scnrm2
-#define caxpy_    caxpy
-#define cgemv_    cgemv
-#define ctrsv_    ctrsv
-#define cgemm_    cgemm
-#define ctrsm_    ctrsm
-#define cgerc_    cgerc
-#define chemv_    chemv
-#define cher2_    cher2
-
-#define dzasum_   dzasum
-#define izamax_   izamax
-#define zcopy_    zcopy
-#define zscal_    zscal
-#define dznrm2_   dznrm2
-#define zaxpy_    zaxpy
-#define zgemv_    zgemv
-#define ztrsv_    ztrsv
-#define zgemm_    zgemm
-#define ztrsm_    ztrsm
-#define zgerc_    zgerc
-#define zhemv_    zhemv
-#define zher2_    zher2
-
-/* LAPACK */
-#define dlamch_   dlamch
-#define slamch_   slamch
-#define xerbla_   xerbla
-#define lsame_    lsame
-#define dlacon_   dlacon
-#define slacon_   slacon
-#define icmax1_   icmax1
-#define scsum1_   scsum1
-#define clacon_   clacon
-#define dzsum1_   dzsum1
-#define izmax1_   izmax1
-#define zlacon_   zlacon
-
-/* Fortran interface */
-#define c_bridge_dgssv_ c_bridge_dgssv
-#define c_fortran_sgssv_ c_fortran_sgssv
-#define c_fortran_dgssv_ c_fortran_dgssv
-#define c_fortran_cgssv_ c_fortran_cgssv
-#define c_fortran_zgssv_ c_fortran_zgssv
-#endif
-
-#endif /* __SUPERLU_CNAMES */
diff --git a/superlu/slu_cdefs.h b/superlu/slu_cdefs.h
deleted file mode 100644
index ade6a025..00000000
--- a/superlu/slu_cdefs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#ifndef __SUPERLU_cSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_cSP_DEFS
-
-/*
- * File name:          csp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-#endif
-
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
-#include "slu_scomplex.h"
-
-
-/*
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    complex  *lusup;   /* L supernodes */
-    int     *xlusup;
-    complex  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Driver routines */
-extern void
-cgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-cgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, float *, float *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       float *, float *, float *, float *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-
-/* Supernodal LU factor related */
-extern void
-cCreate_CompCol_Matrix(SuperMatrix *, int, int, int, complex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-cCreate_CompRow_Matrix(SuperMatrix *, int, int, int, complex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-cCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-cCreate_Dense_Matrix(SuperMatrix *, int, int, complex *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-cCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, complex *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-cCopy_Dense_Matrix(int, int, complex *, int, complex *, int);
-
-  // extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-  // extern void    fixupL (const int, const int *, GlobalLU_t *);
-
-extern void    callocateA (int, int, complex **, int **, int **);
-extern void    cgstrf (superlu_options_t*, SuperMatrix*, float, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     csnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     csnode_bmod (const int, const int, const int, complex *,
-                              complex *, GlobalLU_t *, SuperLUStat_t*);
-extern void    cpanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, complex *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    cpanel_bmod (const int, const int, const int, const int,
-                           complex *, complex *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     ccolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     ccolumn_bmod (const int, const int, complex *,
-                          complex *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     ccopy_to_ucol (int, int, int *, int *, int *,
-                              complex *, GlobalLU_t *);         
-extern int     cpivotL (const int, const float, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    cpruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    creadmt (int *, int *, int *, complex **, int **, int **);
-extern void    cGenXtrue (int, int, complex *, int);
-extern void    cFillRHS (trans_t, int, complex *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    cgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-
-
-/* Driver related */
-
-extern void    cgsequ (SuperMatrix *, float *, float *, float *,
-                       float *, float *, int *);
-extern void    claqgs (SuperMatrix *, float *, float *, float,
-                        float, float, char *);
-extern void    cgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        float, float *, SuperLUStat_t*, int *);
-extern float   cPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    cgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, float *, 
-                       float *, SuperMatrix *, SuperMatrix *,
-                       float *, float *, SuperLUStat_t*, int *);
-
-extern int     sp_ctrsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, complex *, SuperLUStat_t*, int *);
-extern int     sp_cgemv (char *, complex, SuperMatrix *, complex *,
-                       int, complex, complex *, int);
-
-extern int     sp_cgemm (char *, char *, int, int, int, complex,
-                       SuperMatrix *, complex *, int, complex, 
-                       complex *, int);
-
-/* Memory-related */
-extern int     cLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, complex **);
-extern void    cSetRWork (int, int, complex *, complex **, complex **);
-extern void    cLUWorkFree (int *, complex *, GlobalLU_t *);
-extern int     cLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-
-extern complex  *complexMalloc(int);
-extern complex  *complexCalloc(int);
-extern float  *floatMalloc(int);
-extern float  *floatCalloc(int);
-extern int     cmemory_usage(const int, const int, const int, const int);
-extern int     cQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-
-/* Auxiliary routines */
-extern void    creadhb(int *, int *, int *, complex **, int **, int **);
-extern void    cCompRow_to_CompCol(int, int, int, complex*, int*, int*,
-                                  complex **, int **, int **);
-extern void    cfill (complex *, int, complex);
-extern void    cinf_norm_error (int, SuperMatrix *, complex *);
-  // extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-  //                    complex, complex, complex *, complex *, char *);
-
-/* Routines for debugging */
-extern void    cPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    cPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    cPrint_Dense_Matrix(char *, SuperMatrix *);
-// extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-// extern void    check_tempv(int, complex *);
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif /* __SUPERLU_cSP_DEFS */
-
diff --git a/superlu/slu_dcomplex.h b/superlu/slu_dcomplex.h
deleted file mode 100644
index 68dc1ce6..00000000
--- a/superlu/slu_dcomplex.h
+++ /dev/null
@@ -1,93 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#ifndef __SUPERLU_DCOMPLEX /* allow multiple inclusions */
-#define __SUPERLU_DCOMPLEX
-
-/* 
- * This header file is to be included in source files z*.c
- */
-#ifndef DCOMPLEX_INCLUDE
-#define DCOMPLEX_INCLUDE
-
-typedef struct { double r, i; } doublecomplex;
-
-
-/* Macro definitions */
-
-/* Complex Addition c = a + b */
-#define z_add(c, a, b) { (c)->r = (a)->r + (b)->r; \
-                        (c)->i = (a)->i + (b)->i; }
-
-/* Complex Subtraction c = a - b */
-#define z_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \
-                        (c)->i = (a)->i - (b)->i; }
-
-/* Complex-Double Multiplication */
-#define zd_mult(c, a, b) { (c)->r = (a)->r * (b); \
-                           (c)->i = (a)->i * (b); }
-
-/* Complex-Complex Multiplication */
-#define zz_mult(c, a, b) { \
-       double cr, ci; \
-       cr = (a)->r * (b)->r - (a)->i * (b)->i; \
-       ci = (a)->i * (b)->r + (a)->r * (b)->i; \
-       (c)->r = cr; \
-       (c)->i = ci; \
-    }
-
-#define zz_conj(a, b) { \
-        (a)->r = (b)->r; \
-        (a)->i = -((b)->i); \
-    }
-
-/* Complex equality testing */
-#define z_eq(a, b)  ( (a)->r == (b)->r && (a)->i == (b)->i )
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Prototypes for functions in dcomplex.c */
-void z_div(doublecomplex *, doublecomplex *, doublecomplex *);
-double z_abs(doublecomplex *);     /* exact */
-double z_abs1(doublecomplex *);    /* approximate */
-void z_exp(doublecomplex *, doublecomplex *);
-void d_cnjg(doublecomplex *r, doublecomplex *z);
-double d_imag(doublecomplex *);
-
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif
-
-#endif  /* __SUPERLU_DCOMPLEX */
diff --git a/superlu/slu_ddefs.h b/superlu/slu_ddefs.h
deleted file mode 100644
index de3f7e84..00000000
--- a/superlu/slu_ddefs.h
+++ /dev/null
@@ -1,243 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#ifndef __SUPERLU_dSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_dSP_DEFS
-
-/*
- * File name:          dsp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-#endif
-
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
-
-
-/*
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    double  *lusup;   /* L supernodes */
-    int     *xlusup;
-    double  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Driver routines */
-extern void
-dgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-dgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, double *, double *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       double *, double *, double *, double *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-
-/* Supernodal LU factor related */
-extern void
-dCreate_CompCol_Matrix(SuperMatrix *, int, int, int, double *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-dCreate_CompRow_Matrix(SuperMatrix *, int, int, int, double *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-dCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-dCreate_Dense_Matrix(SuperMatrix *, int, int, double *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-dCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, double *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-dCopy_Dense_Matrix(int, int, double *, int, double *, int);
-
-extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-extern void    fixupL (const int, const int *, GlobalLU_t *);
-
-extern void    dallocateA (int, int, double **, int **, int **);
-extern void    dgstrf (superlu_options_t*, SuperMatrix*, double, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     dsnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     dsnode_bmod (const int, const int, const int, double *,
-                              double *, GlobalLU_t *, SuperLUStat_t*);
-extern void    dpanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, double *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    dpanel_bmod (const int, const int, const int, const int,
-                           double *, double *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     dcolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     dcolumn_bmod (const int, const int, double *,
-                          double *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     dcopy_to_ucol (int, int, int *, int *, int *,
-                              double *, GlobalLU_t *);         
-extern int     dpivotL (const int, const double, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    dpruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    dreadmt (int *, int *, int *, double **, int **, int **);
-extern void    dGenXtrue (int, int, double *, int);
-extern void    dFillRHS (trans_t, int, double *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    dgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-
-
-/* Driver related */
-
-extern void    dgsequ (SuperMatrix *, double *, double *, double *,
-                       double *, double *, int *);
-extern void    dlaqgs (SuperMatrix *, double *, double *, double,
-                        double, double, char *);
-extern void    dgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        double, double *, SuperLUStat_t*, int *);
-extern double   dPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    dgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, double *, 
-                       double *, SuperMatrix *, SuperMatrix *,
-                       double *, double *, SuperLUStat_t*, int *);
-
-extern int     sp_dtrsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, double *, SuperLUStat_t*, int *);
-extern int     sp_dgemv (char *, double, SuperMatrix *, double *,
-                       int, double, double *, int);
-
-extern int     sp_dgemm (char *, char *, int, int, int, double,
-                       SuperMatrix *, double *, int, double, 
-                       double *, int);
-
-/* Memory-related */
-extern int     dLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, double **);
-extern void    dSetRWork (int, int, double *, double **, double **);
-extern void    dLUWorkFree (int *, double *, GlobalLU_t *);
-extern int     dLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-
-extern double  *doubleMalloc(int);
-extern double  *doubleCalloc(int);
-extern int     dmemory_usage(const int, const int, const int, const int);
-extern int     dQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-
-/* Auxiliary routines */
-extern void    dreadhb(int *, int *, int *, double **, int **, int **);
-extern void    dCompRow_to_CompCol(int, int, int, double*, int*, int*,
-                                  double **, int **, int **);
-extern void    dfill (double *, int, double);
-extern void    dinf_norm_error (int, SuperMatrix *, double *);
-extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-                        double, double, double *, double *, char *);
-
-/* Routines for debugging */
-extern void    dPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    dPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    dPrint_Dense_Matrix(char *, SuperMatrix *);
-extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-extern void    check_tempv(int, double *);
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif /* __SUPERLU_dSP_DEFS */
-
diff --git a/superlu/slu_scomplex.h b/superlu/slu_scomplex.h
deleted file mode 100644
index 4f653b15..00000000
--- a/superlu/slu_scomplex.h
+++ /dev/null
@@ -1,93 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#ifndef __SUPERLU_SCOMPLEX /* allow multiple inclusions */
-#define __SUPERLU_SCOMPLEX
-
-/* 
- * This header file is to be included in source files c*.c
- */
-#ifndef SCOMPLEX_INCLUDE
-#define SCOMPLEX_INCLUDE
-
-typedef struct { float r, i; } complex;
-
-
-/* Macro definitions */
-
-/* Complex Addition c = a + b */
-#define c_add(c, a, b) { (c)->r = (a)->r + (b)->r; \
-                        (c)->i = (a)->i + (b)->i; }
-
-/* Complex Subtraction c = a - b */
-#define c_sub(c, a, b) { (c)->r = (a)->r - (b)->r; \
-                        (c)->i = (a)->i - (b)->i; }
-
-/* Complex-Double Multiplication */
-#define cs_mult(c, a, b) { (c)->r = (a)->r * (b); \
-                           (c)->i = (a)->i * (b); }
-
-/* Complex-Complex Multiplication */
-#define cc_mult(c, a, b) { \
-       float cr, ci; \
-       cr = (a)->r * (b)->r - (a)->i * (b)->i; \
-       ci = (a)->i * (b)->r + (a)->r * (b)->i; \
-       (c)->r = cr; \
-       (c)->i = ci; \
-    }
-
-#define cc_conj(a, b) { \
-        (a)->r = (b)->r; \
-        (a)->i = -((b)->i); \
-    }
-
-/* Complex equality testing */
-#define c_eq(a, b)  ( (a)->r == (b)->r && (a)->i == (b)->i )
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Prototypes for functions in scomplex.c */
-void c_div(complex *, complex *, complex *);
-double c_abs(complex *);     /* exact */
-double c_abs1(complex *);    /* approximate */
-void c_exp(complex *, complex *);
-void r_cnjg(complex *, complex *);
-double r_imag(complex *);
-
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif
-
-#endif  /* __SUPERLU_SCOMPLEX */
diff --git a/superlu/slu_sdefs.h b/superlu/slu_sdefs.h
deleted file mode 100644
index 5177d5cf..00000000
--- a/superlu/slu_sdefs.h
+++ /dev/null
@@ -1,243 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#ifndef __SUPERLU_sSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_sSP_DEFS
-
-/*
- * File name:          ssp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-#endif
-
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
-
-
-/*
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    float  *lusup;   /* L supernodes */
-    int     *xlusup;
-    float  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Driver routines */
-extern void
-sgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-sgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, float *, float *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       float *, float *, float *, float *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-
-/* Supernodal LU factor related */
-extern void
-sCreate_CompCol_Matrix(SuperMatrix *, int, int, int, float *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-sCreate_CompRow_Matrix(SuperMatrix *, int, int, int, float *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-sCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-sCreate_Dense_Matrix(SuperMatrix *, int, int, float *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-sCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, float *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-sCopy_Dense_Matrix(int, int, float *, int, float *, int);
-
-  // extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-  // extern void    fixupL (const int, const int *, GlobalLU_t *);
-
-extern void    sallocateA (int, int, float **, int **, int **);
-extern void    sgstrf (superlu_options_t*, SuperMatrix*, float, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     ssnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     ssnode_bmod (const int, const int, const int, float *,
-                              float *, GlobalLU_t *, SuperLUStat_t*);
-extern void    spanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, float *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    spanel_bmod (const int, const int, const int, const int,
-                           float *, float *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     scolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     scolumn_bmod (const int, const int, float *,
-                          float *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     scopy_to_ucol (int, int, int *, int *, int *,
-                              float *, GlobalLU_t *);         
-extern int     spivotL (const int, const float, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    spruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    sreadmt (int *, int *, int *, float **, int **, int **);
-extern void    sGenXtrue (int, int, float *, int);
-extern void    sFillRHS (trans_t, int, float *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    sgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-
-
-/* Driver related */
-
-extern void    sgsequ (SuperMatrix *, float *, float *, float *,
-                       float *, float *, int *);
-extern void    slaqgs (SuperMatrix *, float *, float *, float,
-                        float, float, char *);
-extern void    sgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        float, float *, SuperLUStat_t*, int *);
-extern float   sPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    sgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, float *, 
-                       float *, SuperMatrix *, SuperMatrix *,
-                       float *, float *, SuperLUStat_t*, int *);
-
-extern int     sp_strsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, float *, SuperLUStat_t*, int *);
-extern int     sp_sgemv (char *, float, SuperMatrix *, float *,
-                       int, float, float *, int);
-
-extern int     sp_sgemm (char *, char *, int, int, int, float,
-                       SuperMatrix *, float *, int, float, 
-                       float *, int);
-
-/* Memory-related */
-extern int     sLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, float **);
-extern void    sSetRWork (int, int, float *, float **, float **);
-extern void    sLUWorkFree (int *, float *, GlobalLU_t *);
-extern int     sLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-
-extern float  *floatMalloc(int);
-extern float  *floatCalloc(int);
-extern int     smemory_usage(const int, const int, const int, const int);
-extern int     sQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-
-/* Auxiliary routines */
-extern void    sreadhb(int *, int *, int *, float **, int **, int **);
-extern void    sCompRow_to_CompCol(int, int, int, float*, int*, int*,
-                                  float **, int **, int **);
-extern void    sfill (float *, int, float);
-extern void    sinf_norm_error (int, SuperMatrix *, float *);
-  // extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-  //                    float, float, float *, float *, char *);
-
-/* Routines for debugging */
-extern void    sPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    sPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    sPrint_Dense_Matrix(char *, SuperMatrix *);
-// extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-// extern void    check_tempv(int, float *);
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif /* __SUPERLU_sSP_DEFS */
-
diff --git a/superlu/slu_util.h b/superlu/slu_util.h
deleted file mode 100644
index afed257a..00000000
--- a/superlu/slu_util.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#ifndef __SUPERLU_UTIL /* allow multiple inclusions */
-#define __SUPERLU_UTIL
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-/*
-#ifndef __STDC__
-#include <malloc.h>
-#endif
-*/
-#include <assert.h>
-
-/***********************************************************************
- * Macros
- ***********************************************************************/
-#define FIRSTCOL_OF_SNODE(i)   (xsup[i])
-/* No of marker arrays used in the symbolic factorization,
-   each of size n */
-#define NO_MARKER     3
-#define NUM_TEMPV(m,w,t,b)  ( SUPERLU_MAX(m, (t + b)*w) )
-
-#ifndef USER_ABORT
-#define USER_ABORT(msg) superlu_abort_and_exit(msg)
-#endif
-
-#define ABORT(err_msg) \
- { char msg[256];\
-   sprintf(msg,"%s at line %d in file %s\n",err_msg,__LINE__, __FILE__);\
-   USER_ABORT(msg); }
-
-
-#ifndef USER_MALLOC
-#if 1
-#define USER_MALLOC(size) superlu_malloc(size)
-#else
-/* The following may check out some uninitialized data */
-#define USER_MALLOC(size) memset (superlu_malloc(size), '\x0F', size)
-#endif
-#endif
-
-#define SUPERLU_MALLOC(size) USER_MALLOC(size)
-
-#ifndef USER_FREE
-#define USER_FREE(addr) superlu_free(addr)
-#endif
-
-#define SUPERLU_FREE(addr) USER_FREE(addr)
-
-#define CHECK_MALLOC(where) {                 \
-    extern int superlu_malloc_total;        \
-    printf("%s: malloc_total %d Bytes\n",     \
-          where, superlu_malloc_total); \
-}
-
-#define SUPERLU_MAX(x, y)      ( (x) > (y) ? (x) : (y) )
-#define SUPERLU_MIN(x, y)      ( (x) < (y) ? (x) : (y) )
-
-/***********************************************************************
- * Constants 
- ***********************************************************************/
-#define EMPTY  (-1)
-/*#define NO   (-1)*/
-#define FALSE  0
-#define TRUE   1
-
-/***********************************************************************
- * Enumerate types
- ***********************************************************************/
-typedef enum {NO, YES}                                          yes_no_t;
-typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} fact_t;
-typedef enum {NOROWPERM, LargeDiag, MY_PERMR}                   rowperm_t;
-typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, MY_PERMC}colperm_t;
-typedef enum {NOTRANS, TRANS, CONJ}                             trans_t;
-typedef enum {NOEQUIL, ROW, COL, BOTH}                          DiagScale_t;
-typedef enum {NOREFINE, SINGLE=1, DOUBLE, EXTRA}                IterRefine_t;
-typedef enum {LUSUP, UCOL, LSUB, USUB}                          MemType;
-typedef enum {HEAD, TAIL}                                       stack_end_t;
-typedef enum {SYSTEM, USER}                                     LU_space_t;
-
-/* 
- * The following enumerate type is used by the statistics variable 
- * to keep track of flop count and time spent at various stages.
- *
- * Note that not all of the fields are disjoint.
- */
-typedef enum {
-    COLPERM, /* find a column ordering that minimizes fills */
-    RELAX,   /* find artificial supernodes */
-    ETREE,   /* compute column etree */
-    EQUIL,   /* equilibrate the original matrix */
-    FACT,    /* perform LU factorization */
-    RCOND,   /* estimate reciprocal condition number */
-    SOLVE,   /* forward and back solves */
-    REFINE,  /* perform iterative refinement */
-    TRSV,    /* fraction of FACT spent in xTRSV */
-    GEMV,    /* fraction of FACT spent in xGEMV */
-    FERR,    /* estimate error bounds after iterative refinement */
-    NPHASES  /* total number of phases */
-} PhaseType;
-
-
-/***********************************************************************
- * Type definitions
- ***********************************************************************/
-typedef float    flops_t;
-typedef unsigned char Logical;
-
-/* 
- *-- This contains the options used to control the solve process.
- *
- * Fact   (fact_t)
- *        Specifies whether or not the factored form of the matrix
- *        A is supplied on entry, and if not, how the matrix A should
- *        be factorizaed.
- *        = DOFACT: The matrix A will be factorized from scratch, and the
- *             factors will be stored in L and U.
- *        = SamePattern: The matrix A will be factorized assuming
- *             that a factorization of a matrix with the same sparsity
- *             pattern was performed prior to this one. Therefore, this
- *             factorization will reuse column permutation vector 
- *             ScalePermstruct->perm_c and the column elimination tree
- *             LUstruct->etree.
- *        = SamePattern_SameRowPerm: The matrix A will be factorized
- *             assuming that a factorization of a matrix with the same
- *             sparsity        pattern and similar numerical values was 
performed
- *             prior to this one. Therefore, this factorization will reuse
- *             both row and column scaling factors R and C, both row and
- *             column permutation vectors perm_r and perm_c, and the
- *             data structure set up from the previous symbolic factorization.
- *        = FACTORED: On entry, L, U, perm_r and perm_c contain the 
- *              factored form of A. If DiagScale is not NOEQUIL, the matrix
- *              A has been equilibrated with scaling factors R and C.
- *
- * Equil  (yes_no_t)
- *        Specifies whether to equilibrate the system (scale A's row and
- *        columns to have unit norm).
- *
- * ColPerm (colperm_t)
- *        Specifies what type of column permutation to use to reduce fill.
- *        = NATURAL: use the natural ordering 
- *        = MMD_ATA: use minimum degree ordering on structure of A'*A
- *        = MMD_AT_PLUS_A: use minimum degree ordering on structure of A'+A
- *        = COLAMD: use approximate minimum degree column ordering
- *        = MY_PERMC: use the ordering specified in ScalePermstruct->perm_c[]
- *         
- * Trans  (trans_t)
- *        Specifies the form of the system of equations:
- *        = NOTRANS: A * X = B        (No transpose)
- *        = TRANS:   A**T * X = B     (Transpose)
- *        = CONJ:    A**H * X = B     (Transpose)
- *
- * IterRefine (IterRefine_t)
- *        Specifies whether to perform iterative refinement.
- *        = NO: no iterative refinement
- *        = WorkingPrec: perform iterative refinement in working precision
- *        = ExtraPrec: perform iterative refinement in extra precision
- *
- * PrintStat (yes_no_t)
- *        Specifies whether to print the solver's statistics.
- *
- * DiagPivotThresh (double, in [0.0, 1.0]) (only for sequential SuperLU)
- *        Specifies the threshold used for a diagonal entry to be an
- *        acceptable pivot.
- *
- * PivotGrowth (yes_no_t)
- *        Specifies whether to compute the reciprocal pivot growth.
- *
- * ConditionNumber (ues_no_t)
- *        Specifies whether to compute the reciprocal condition number.
- *
- * RowPerm (rowperm_t) (only for SuperLU_DIST)
- *        Specifies whether to permute rows of the original matrix.
- *        = NO: not to permute the rows
- *        = LargeDiag: make the diagonal large relative to the off-diagonal
- *        = MY_PERMR: use the permutation given in ScalePermstruct->perm_r[]
- *           
- * ReplaceTinyPivot (yes_no_t) (only for SuperLU_DIST)
- *        Specifies whether to replace the tiny diagonals by
- *        sqrt(epsilon)*||A|| during LU factorization.
- *
- * SolveInitialized (yes_no_t) (only for SuperLU_DIST)
- *        Specifies whether the initialization has been performed to the
- *        triangular solve.
- *
- * RefineInitialized (yes_no_t) (only for SuperLU_DIST)
- *        Specifies whether the initialization has been performed to the
- *        sparse matrix-vector multiplication routine needed in iterative
- *        refinement.
- */
-typedef struct {
-    fact_t        Fact;
-    yes_no_t      Equil;
-    colperm_t     ColPerm;
-    trans_t       Trans;
-    IterRefine_t  IterRefine;
-    yes_no_t      PrintStat;
-    yes_no_t      SymmetricMode;
-    double        DiagPivotThresh;
-    yes_no_t      PivotGrowth;
-    yes_no_t      ConditionNumber;
-    rowperm_t     RowPerm;
-    yes_no_t      ReplaceTinyPivot;
-    yes_no_t      SolveInitialized;
-    yes_no_t      RefineInitialized;
-} superlu_options_t;
-
-typedef struct {
-    int     *panel_histo; /* histogram of panel size distribution */
-    double  *utime;       /* running time at various phases */
-    flops_t *ops;         /* operation count at various phases */
-    int     TinyPivots;   /* number of tiny pivots */
-    int     RefineSteps;  /* number of iterative refinement steps */
-} SuperLUStat_t;
-
-
-/***********************************************************************
- * Prototypes
- ***********************************************************************/
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-extern void    Destroy_SuperMatrix_Store(SuperMatrix *);
-extern void    Destroy_CompCol_Matrix(SuperMatrix *);
-extern void    Destroy_CompRow_Matrix(SuperMatrix *);
-extern void    Destroy_SuperNode_Matrix(SuperMatrix *);
-extern void    Destroy_CompCol_Permuted(SuperMatrix *);
-extern void    Destroy_Dense_Matrix(SuperMatrix *);
-extern void    get_perm_c(int, SuperMatrix *, int *);
-extern void    set_default_options(superlu_options_t *options);
-extern void    sp_preorder (superlu_options_t *, SuperMatrix*, int*, int*,
-                           SuperMatrix*);
-extern void    superlu_abort_and_exit(char*);
-extern void    *superlu_malloc (size_t);
-extern int     *intMalloc (int);
-extern int     *intCalloc (int);
-extern void    superlu_free (void*);
-extern void    SetIWork (int, int, int, int *, int **, int **, int **,
-                         int **, int **, int **, int **);
-extern int     sp_coletree (int *, int *, int *, int, int, int *);
-extern void    relax_snode (const int, int *, const int, int *, int *);
-extern void    heap_relax_snode (const int, int *, const int, int *, int *);
-extern void    resetrep_col (const int, const int *, int *);
-extern int     spcoletree (int *, int *, int *, int, int, int *);
-extern int     *TreePostorder (int, int *);
-extern double  SuperLU_timer_ ();
-extern int     sp_ienv (int);
-extern int     lsame_ (char *, char *);
-extern int     xerbla_ (char *, int *);
-extern void    ifill (int *, int, int);
-extern void    snode_profile (int, int *);
-extern void    super_stats (int, int *);
-extern void    PrintSumm (char *, int, int, int);
-extern void    StatInit(SuperLUStat_t *);
-extern void    StatPrint (SuperLUStat_t *);
-extern void    StatFree(SuperLUStat_t *);
-extern void    print_panel_seg(int, int, int, int, int *, int *);
-extern void    check_repfnz(int, int, int, int *);
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif /* __SUPERLU_UTIL */
diff --git a/superlu/slu_zdefs.h b/superlu/slu_zdefs.h
deleted file mode 100644
index 2659d6a4..00000000
--- a/superlu/slu_zdefs.h
+++ /dev/null
@@ -1,246 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#ifndef __SUPERLU_zSP_DEFS /* allow multiple inclusions */
-#define __SUPERLU_zSP_DEFS
-
-/*
- * File name:          zsp_defs.h
- * Purpose:             Sparse matrix types and function prototypes
- * History:
- */
-
-#ifdef _CRAY
-#include <fortran.h>
-#include <string.h>
-#endif
-
-/* Define my integer type int_t */
-typedef int int_t; /* default */
-
-#include "slu_Cnames.h"
-#include "supermatrix.h"
-#include "slu_util.h"
-#include "slu_dcomplex.h"
-
-
-/*
- * Global data structures used in LU factorization -
- * 
- *   nsuper: #supernodes = nsuper + 1, numbered [0, nsuper].
- *   (xsup,supno): supno[i] is the supernode no to which i belongs;
- *     xsup(s) points to the beginning of the s-th supernode.
- *     e.g.   supno 0 1 2 2 3 3 3 4 4 4 4 4   (n=12)
- *             xsup 0 1 2 4 7 12
- *     Note: dfs will be performed on supernode rep. relative to the new 
- *           row pivoting ordering
- *
- *   (xlsub,lsub): lsub[*] contains the compressed subscript of
- *     rectangular supernodes; xlsub[j] points to the starting
- *     location of the j-th column in lsub[*]. Note that xlsub 
- *     is indexed by column.
- *     Storage: original row subscripts
- *
- *      During the course of sparse LU factorization, we also use
- *     (xlsub,lsub) for the purpose of symmetric pruning. For each
- *     supernode {s,s+1,...,t=s+r} with first column s and last
- *     column t, the subscript set
- *             lsub[j], j=xlsub[s], .., xlsub[s+1]-1
- *     is the structure of column s (i.e. structure of this supernode).
- *     It is used for the storage of numerical values.
- *     Furthermore,
- *             lsub[j], j=xlsub[t], .., xlsub[t+1]-1
- *     is the structure of the last column t of this supernode.
- *     It is for the purpose of symmetric pruning. Therefore, the
- *     structural subscripts can be rearranged without making physical
- *     interchanges among the numerical values.
- *
- *     However, if the supernode has only one column, then we
- *     only keep one set of subscripts. For any subscript interchange
- *     performed, similar interchange must be done on the numerical
- *     values.
- *
- *     The last column structures (for pruning) will be removed
- *     after the numercial LU factorization phase.
- *
- *   (xlusup,lusup): lusup[*] contains the numerical values of the
- *     rectangular supernodes; xlusup[j] points to the starting
- *     location of the j-th column in storage vector lusup[*]
- *     Note: xlusup is indexed by column.
- *     Each rectangular supernode is stored by column-major
- *     scheme, consistent with Fortran 2-dim array storage.
- *
- *   (xusub,ucol,usub): ucol[*] stores the numerical values of
- *     U-columns outside the rectangular supernodes. The row
- *     subscript of nonzero ucol[k] is stored in usub[k].
- *     xusub[i] points to the starting location of column i in ucol.
- *     Storage: new row subscripts; that is subscripts of PA.
- */
-typedef struct {
-    int     *xsup;    /* supernode and column mapping */
-    int     *supno;   
-    int     *lsub;    /* compressed L subscripts */
-    int            *xlsub;
-    doublecomplex  *lusup;   /* L supernodes */
-    int     *xlusup;
-    doublecomplex  *ucol;    /* U columns */
-    int     *usub;
-    int            *xusub;
-    int     nzlmax;   /* current max size of lsub */
-    int     nzumax;   /*    "    "    "      ucol */
-    int     nzlumax;  /*    "    "    "     lusup */
-    int     n;        /* number of columns in the matrix */
-    LU_space_t MemModel; /* 0 - system malloc'd; 1 - user provided */
-} GlobalLU_t;
-
-typedef struct {
-    float for_lu;
-    float total_needed;
-    int   expansions;
-} mem_usage_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Driver routines */
-extern void
-zgssv(superlu_options_t *, SuperMatrix *, int *, int *, SuperMatrix *,
-      SuperMatrix *, SuperMatrix *, SuperLUStat_t *, int *);
-extern void
-zgssvx(superlu_options_t *, SuperMatrix *, int *, int *, int *,
-       char *, double *, double *, SuperMatrix *, SuperMatrix *,
-       void *, int, SuperMatrix *, SuperMatrix *,
-       double *, double *, double *, double *,
-       mem_usage_t *, SuperLUStat_t *, int *);
-
-/* Supernodal LU factor related */
-extern void
-zCreate_CompCol_Matrix(SuperMatrix *, int, int, int, doublecomplex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-zCreate_CompRow_Matrix(SuperMatrix *, int, int, int, doublecomplex *,
-                      int *, int *, Stype_t, Dtype_t, Mtype_t);
-extern void
-zCopy_CompCol_Matrix(SuperMatrix *, SuperMatrix *);
-extern void
-zCreate_Dense_Matrix(SuperMatrix *, int, int, doublecomplex *, int,
-                    Stype_t, Dtype_t, Mtype_t);
-extern void
-zCreate_SuperNode_Matrix(SuperMatrix *, int, int, int, doublecomplex *, 
-                        int *, int *, int *, int *, int *,
-                        Stype_t, Dtype_t, Mtype_t);
-extern void
-zCopy_Dense_Matrix(int, int, doublecomplex *, int, doublecomplex *, int);
-
-  // extern void    countnz (const int, int *, int *, int *, GlobalLU_t *);
-  // extern void    fixupL (const int, const int *, GlobalLU_t *);
-
-extern void    zallocateA (int, int, doublecomplex **, int **, int **);
-extern void    zgstrf (superlu_options_t*, SuperMatrix*, double, 
-                       int, int, int*, void *, int, int *, int *, 
-                       SuperMatrix *, SuperMatrix *, SuperLUStat_t*, int *);
-extern int     zsnode_dfs (const int, const int, const int *, const int *,
-                            const int *, int *, int *, GlobalLU_t *);
-extern int     zsnode_bmod (const int, const int, const int, doublecomplex *,
-                              doublecomplex *, GlobalLU_t *, SuperLUStat_t*);
-extern void    zpanel_dfs (const int, const int, const int, SuperMatrix *,
-                          int *, int *, doublecomplex *, int *, int *, int *,
-                          int *, int *, int *, int *, GlobalLU_t *);
-extern void    zpanel_bmod (const int, const int, const int, const int,
-                           doublecomplex *, doublecomplex *, int *, int *,
-                          GlobalLU_t *, SuperLUStat_t*);
-extern int     zcolumn_dfs (const int, const int, int *, int *, int *, int *,
-                          int *, int *, int *, int *, int *, GlobalLU_t *);
-extern int     zcolumn_bmod (const int, const int, doublecomplex *,
-                          doublecomplex *, int *, int *, int,
-                           GlobalLU_t *, SuperLUStat_t*);
-extern int     zcopy_to_ucol (int, int, int *, int *, int *,
-                              doublecomplex *, GlobalLU_t *);         
-extern int     zpivotL (const int, const double, int *, int *, 
-                         int *, int *, int *, GlobalLU_t *, SuperLUStat_t*);
-extern void    zpruneL (const int, const int *, const int, const int,
-                         const int *, const int *, int *, GlobalLU_t *);
-extern void    zreadmt (int *, int *, int *, doublecomplex **, int **, int **);
-extern void    zGenXtrue (int, int, doublecomplex *, int);
-extern void    zFillRHS (trans_t, int, doublecomplex *, int, SuperMatrix *,
-                         SuperMatrix *);
-extern void    zgstrs (trans_t, SuperMatrix *, SuperMatrix *, int *, int *,
-                        SuperMatrix *, SuperLUStat_t*, int *);
-
-
-/* Driver related */
-
-extern void    zgsequ (SuperMatrix *, double *, double *, double *,
-                       double *, double *, int *);
-extern void    zlaqgs (SuperMatrix *, double *, double *, double,
-                        double, double, char *);
-extern void    zgscon (char *, SuperMatrix *, SuperMatrix *, 
-                        double, double *, SuperLUStat_t*, int *);
-extern double   zPivotGrowth(int, SuperMatrix *, int *, 
-                            SuperMatrix *, SuperMatrix *);
-extern void    zgsrfs (trans_t, SuperMatrix *, SuperMatrix *,
-                       SuperMatrix *, int *, int *, char *, double *, 
-                       double *, SuperMatrix *, SuperMatrix *,
-                       double *, double *, SuperLUStat_t*, int *);
-
-extern int     sp_ztrsv (char *, char *, char *, SuperMatrix *,
-                       SuperMatrix *, doublecomplex *, SuperLUStat_t*, int *);
-extern int     sp_zgemv (char *, doublecomplex, SuperMatrix *, doublecomplex *,
-                       int, doublecomplex, doublecomplex *, int);
-
-extern int     sp_zgemm (char *, char *, int, int, int, doublecomplex,
-                       SuperMatrix *, doublecomplex *, int, doublecomplex, 
-                       doublecomplex *, int);
-
-/* Memory-related */
-extern int     zLUMemInit (fact_t, void *, int, int, int, int, int,
-                            SuperMatrix *, SuperMatrix *,
-                            GlobalLU_t *, int **, doublecomplex **);
-extern void    zSetRWork (int, int, doublecomplex *, doublecomplex **, 
doublecomplex **);
-extern void    zLUWorkFree (int *, doublecomplex *, GlobalLU_t *);
-extern int     zLUMemXpand (int, int, MemType, int *, GlobalLU_t *);
-
-extern doublecomplex  *doublecomplexMalloc(int);
-extern doublecomplex  *doublecomplexCalloc(int);
-extern double  *doubleMalloc(int);
-extern double  *doubleCalloc(int);
-extern int     zmemory_usage(const int, const int, const int, const int);
-extern int     zQuerySpace (SuperMatrix *, SuperMatrix *, mem_usage_t *);
-
-/* Auxiliary routines */
-extern void    zreadhb(int *, int *, int *, doublecomplex **, int **, int **);
-extern void    zCompRow_to_CompCol(int, int, int, doublecomplex*, int*, int*,
-                                  doublecomplex **, int **, int **);
-extern void    zfill (doublecomplex *, int, doublecomplex);
-extern void    zinf_norm_error (int, SuperMatrix *, doublecomplex *);
-  // extern void    PrintPerf (SuperMatrix *, SuperMatrix *, mem_usage_t *,
-  //                    doublecomplex, doublecomplex, doublecomplex *, 
doublecomplex *, char *);
-
-/* Routines for debugging */
-extern void    zPrint_CompCol_Matrix(char *, SuperMatrix *);
-extern void    zPrint_SuperNode_Matrix(char *, SuperMatrix *);
-extern void    zPrint_Dense_Matrix(char *, SuperMatrix *);
-// extern void    print_lu_col(char *, int, int, int *, GlobalLU_t *);
-// extern void    check_tempv(int, doublecomplex *);
-
-#ifdef __cplusplus
-  }
-#endif
-
-#endif /* __SUPERLU_zSP_DEFS */
-
diff --git a/superlu/smemory.c b/superlu/smemory.c
deleted file mode 100644
index 0f2fbda4..00000000
--- a/superlu/smemory.c
+++ /dev/null
@@ -1,689 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_sdefs.h"
-
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-
-/* Internal prototypes */
-void  *sexpand (int *, MemType,int, int, GlobalLU_t *);
-int   sLUWorkInit (int, int, int, int **, float **, LU_space_t);
-void  copy_mem_float (int, void *, void *);
-void  sStackCompress (GlobalLU_t *);
-void  sSetupSpace (void *, int, LU_space_t *);
-void  *suser_malloc (int, int);
-void  suser_free (int, int);
-
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(float) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
-
-
-
-
-/*
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void sSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-{
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-}
-
-
-
-void *suser_malloc(int bytes, int which_end)
-{
-    void *buf;
-    
-    if ( StackFull(bytes) ) return (NULL);
-
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    
-    stack.used += bytes;
-    return buf;
-}
-
-
-void suser_free(int bytes, int which_end)
-{
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
-}
-
-
-
-/*
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int sQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(float);
-
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-
-    mem_usage->expansions = --no_expand;
-
-    return 0;
-} /* sQuerySpace */
-
-/*
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-int
-sLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, float **dwork)
-{
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    float   *lusup;
-    int      *xlusup;
-    float   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(float);
-
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           sSetupSpace(work, lwork, &Glu->MemModel);
-       }
-       
-#if ( PRNTlevel >= 1 )
-       printf("sLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-#endif 
-       
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)suser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)suser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)suser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)suser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)suser_malloc((n+1) * iword, HEAD);
-       }
-
-       lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
-
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               suser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("sLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-#endif
-           lusup = (float *) sexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (float *) sexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    sexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    sexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-       
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    
-    info = sLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + smemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    
-    ++no_expand;
-    return 0;
-    
-} /* sLUMemInit */
-
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-int
-sLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            float **dworkptr, LU_space_t MemModel)
-{
-    int    isize, dsize, extra;
-    float *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(float);
-    
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) suser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "sLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-
-    if ( MemModel == SYSTEM )
-       *dworkptr = (float *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (float *) suser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (float*) DoubleAlign(*dworkptr);
-           *dworkptr = (float*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("sLUWorkInit: not aligned, extra %d\n", extra);
-#endif     
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-       
-    return 0;
-}
-
-
-/*
- * Set up pointers for real working arrays.
- */
-void
-sSetRWork(int m, int panel_size, float *dworkptr,
-        float **dense, float **tempv)
-{
-    float zero = 0.0;
-
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    sfill (*dense, m * panel_size, zero);
-    sfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
-}
-       
-/*
- * Free the working storage used by factor routines.
- */
-void sLUWorkFree(int *iwork, float *dwork, GlobalLU_t *Glu)
-{
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     sStackCompress(Glu);  */
-    }
-    
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-}
-
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-sLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
*/
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-{
-    void   *new_mem;
-    
-#ifdef DEBUG    
-    printf("sLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-#endif    
-
-    if (mem_type == USUB) 
-       new_mem = sexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = sexpand(maxlen, mem_type, next, 0, Glu);
-    
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (smemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (float *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (float *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    
-    return 0;
-    
-}
-
-
-
-void
-copy_mem_float(int howmany, void *old, void *new)
-{
-    register int i;
-    float *dold = old;
-    float *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
-}
-
-/*
- * Expand the existing storage to accommodate more fill-ins.
- */
-void
-*sexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-{
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-
-    alpha = EXPAND;
-
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(float);
-
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_float(len_to_copy, expanders[type].mem, new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-       
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = suser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-#endif         
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-               
-           } /* if ... */
-
-       } /* else ... */
-    }
-
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    
-    return (void *) expanders[type].mem;
-    
-} /* sexpand */
-
-
-/*
- * Compress the work[] array to remove fragmentation.
- */
-void
-sStackCompress(GlobalLU_t *Glu)
-{
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    float   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    float   *ucol, *lusup;
-    
-    iword = sizeof(int);
-    dword = sizeof(float);
-    ndim = Glu->n;
-
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    
-    dfrom = ucol;
-    dto = (float *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_float(xusub[ndim], dfrom, dto);
-    ucol = dto;
-
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-    
-#ifdef DEBUG
-    printf("sStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
-#endif    
-    
-}
-
-/*
- * Allocate storage for original matrix A
- */
-void
-sallocateA(int n, int nnz, float **a, int **asub, int **xa)
-{
-    *a    = (float *) floatMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-}
-
-
-float *floatMalloc(int n)
-{
-    float *buf;
-    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in floatMalloc()\n");
-    }
-    return (buf);
-}
-
-float *floatCalloc(int n)
-{
-    float *buf;
-    register int i;
-    float zero = 0.0;
-    buf = (float *) SUPERLU_MALLOC((size_t)n * sizeof(float));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in floatCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-}
-
-
-int smemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-{
-    register int iword, dword;
-
-    iword   = sizeof(int);
-    dword   = sizeof(float);
-    
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
-
-}
diff --git a/superlu/smyblas2.c b/superlu/smyblas2.c
deleted file mode 100644
index 8e9bb09a..00000000
--- a/superlu/smyblas2.c
+++ /dev/null
@@ -1,245 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:          smyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void slsolve ( int ldm, int ncol, float *M, float *rhs )
-{
-    int k;
-    float x0, x1, x2, x3, x4, x5, x6, x7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 7 ) { /* Do 8 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-      Mki4 = Mki3 + ldm + 1;
-      Mki5 = Mki4 + ldm + 1;
-      Mki6 = Mki5 + ldm + 1;
-      Mki7 = Mki6 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-      x4 = rhs[firstcol+4] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++;
-      x5 = rhs[firstcol+5] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++;
-      x6 = rhs[firstcol+6] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++;
-      x7 = rhs[firstcol+7] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++
-                          - x3 * *Mki3++ - x4 * *Mki4++ - x5 * *Mki5++
-                          - x6 * *Mki6++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      rhs[++firstcol] = x4;
-      rhs[++firstcol] = x5;
-      rhs[++firstcol] = x6;
-      rhs[++firstcol] = x7;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++
-                        - x4 * *Mki4++ - x5 * *Mki5++
-                       - x6 * *Mki6++ - x7 * *Mki7++;
- 
-      M0 += 8 * ldm + 8;
-    }
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-      Mki2 = Mki1 + ldm + 1;
-      Mki3 = Mki2 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-      x2 = rhs[firstcol+2] - x0 * *Mki0++ - x1 * *Mki1++;
-      x3 = rhs[firstcol+3] - x0 * *Mki0++ - x1 * *Mki1++ - x2 * *Mki2++;
-
-      rhs[++firstcol] = x1;
-      rhs[++firstcol] = x2;
-      rhs[++firstcol] = x3;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++
-                       - x2 * *Mki2++ - x3 * *Mki3++;
- 
-      M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-      Mki0 = M0 + 1;
-      Mki1 = Mki0 + ldm + 1;
-
-      x0 = rhs[firstcol];
-      x1 = rhs[firstcol+1] - x0 * *Mki0++;
-
-      rhs[++firstcol] = x1;
-      ++firstcol;
-    
-      for (k = firstcol; k < ncol; k++)
-       rhs[k] = rhs[k] - x0 * *Mki0++ - x1 * *Mki1++;
- 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-susolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-float *M;      /* in */
-float *rhs;    /* modified */
-{
-    float xj;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-       xj = rhs[jcol] / M[jcol + jcol*ldm];            /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       
-       for (irow = 0; irow < jcol; irow++)
-           rhs[irow] -= xj * M[irow + jcol*ldm];       /* M(irow, jcol) */
-
-       jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void smatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-float *M;      /* in */
-float *vec;    /* in */
-float *Mxvec;  /* in/out */
-
-{
-    float vi0, vi1, vi2, vi3, vi4, vi5, vi6, vi7;
-    float *M0;
-    register float *Mki0, *Mki1, *Mki2, *Mki3, *Mki4, *Mki5, *Mki6, *Mki7;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-    while ( firstcol < ncol - 7 ) {    /* Do 8 columns */
-
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-        Mki2 = Mki1 + ldm;
-        Mki3 = Mki2 + ldm;
-       Mki4 = Mki3 + ldm;
-       Mki5 = Mki4 + ldm;
-       Mki6 = Mki5 + ldm;
-       Mki7 = Mki6 + ldm;
-
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       vi4 = vec[firstcol++];
-       vi5 = vec[firstcol++];
-       vi6 = vec[firstcol++];
-       vi7 = vec[firstcol++];  
-
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ 
-                     + vi4 * *Mki4++ + vi5 * *Mki5++
-                     + vi6 * *Mki6++ + vi7 * *Mki7++;
-
-       M0 += 8 * ldm;
-    }
-
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) 
-           Mxvec[k] += vi0 * *Mki0++ + vi1 * *Mki1++
-                     + vi2 * *Mki2++ + vi3 * *Mki3++ ;
-
-       M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {                /* Do 1 column */
-
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++)
-           Mxvec[k] += vi0 * *Mki0++;
-
-       M0 += ldm;
-    }
-       
-}
-
diff --git a/superlu/sp_coletree.c b/superlu/sp_coletree.c
deleted file mode 100644
index 48487085..00000000
--- a/superlu/sp_coletree.c
+++ /dev/null
@@ -1,354 +0,0 @@
-
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*  Elimination tree computation and layout routines */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_ddefs.h"
-
-/* 
- *  Implementation of disjoint set union routines.
- *  Elements are integers in 0..n-1, and the 
- *  names of the sets themselves are of type int.
- *  
- *  Calls are:
- *  initialize_disjoint_sets (n) initial call.
- *  s = make_set (i)             returns a set containing only i.
- *  s = link (t, u)             returns s = t union u, destroying t and u.
- *  s = find (i)                return name of set containing i.
- *  finalize_disjoint_sets      final call.
- *
- *  This implementation uses path compression but not weighted union.
- *  See Tarjan's book for details.
- *  John Gilbert, CMI, 1987.
- *
- *  Implemented path-halving by XSL 07/05/95.
- */
-
-static int     *pp;            /* parent array for sets */
-
-static 
-int *mxCallocInt(int n)
-{
-    register int i;
-    int *buf;
-
-    buf = (int *) SUPERLU_MALLOC( n * sizeof(int) );
-    if ( !buf ) {
-         ABORT("SUPERLU_MALLOC fails for buf in mxCallocInt()");
-       }
-    for (i = 0; i < n; i++) buf[i] = 0;
-    return (buf);
-}
-      
-static
-void initialize_disjoint_sets (
-       int n
-       )
-{
-       pp = mxCallocInt(n);
-}
-
-
-static
-int make_set (
-       int i
-       )
-{
-       pp[i] = i;
-       return i;
-}
-
-
-static
-int link (
-       int s,
-       int t
-       )
-{
-       pp[s] = t;
-       return t;
-}
-
-
-/* PATH HALVING */
-static
-int find (int i)
-{
-    register int p, gp;
-    
-    p = pp[i];
-    gp = pp[p];
-    while (gp != p) {
-       pp[i] = gp;
-       i = gp;
-       p = pp[i];
-       gp = pp[p];
-    }
-    return (p);
-}
-
-#if 0
-/* PATH COMPRESSION */
-static
-int find (
-       int i
-       )
-{
-       if (pp[i] != i) 
-               pp[i] = find (pp[i]);
-       return pp[i];
-}
-#endif
-
-static
-void finalize_disjoint_sets (
-       void
-       )
-{
-       SUPERLU_FREE(pp);
-}
-
-
-/*
- *      Find the elimination tree for A'*A.
- *      This uses something similar to Liu's algorithm. 
- *      It runs in time O(nz(A)*log n) and does not form A'*A.
- *
- *      Input:
- *        Sparse matrix A.  Numeric values are ignored, so any
- *        explicit zeros are treated as nonzero.
- *      Output:
- *        Integer array of parents representing the elimination
- *        tree of the symbolic product A'*A.  Each vertex is a
- *        column of A, and nc means a root of the elimination forest.
- *
- *      John R. Gilbert, Xerox, 10 Dec 1990
- *      Based on code by JRG dated 1987, 1988, and 1990.
- */
-
-/*
- * Nonsymmetric elimination tree
- */
-int
-sp_coletree(
-           int *acolst, int *acolend, /* column start and end past 1 */
-           int *arow,                 /* row indices of A */
-           int nr, int nc,            /* dimension of A */
-           int *parent                /* parent in elim tree */
-           )
-{
-       int     *root;                  /* root of subtee of etree      */
-       int     *firstcol;              /* first nonzero col in each row*/
-       int     rset, cset;             
-       int     row, col;
-       int     rroot;
-       int     p;
-
-       root = mxCallocInt (nc);
-       initialize_disjoint_sets (nc);
-
-       /* Compute firstcol[row] = first nonzero column in row */
-
-       firstcol = mxCallocInt (nr);
-       for (row = 0; row < nr; firstcol[row++] = nc);
-       for (col = 0; col < nc; col++) 
-               for (p = acolst[col]; p < acolend[col]; p++) {
-                       row = arow[p];
-                       firstcol[row] = SUPERLU_MIN(firstcol[row], col);
-               }
-
-       /* Compute etree by Liu's algorithm for symmetric matrices,
-           except use (firstcol[r],c) in place of an edge (r,c) of A.
-          Thus each row clique in A'*A is replaced by a star
-          centered at its first vertex, which has the same fill. */
-
-       for (col = 0; col < nc; col++) {
-               cset = make_set (col);
-               root[cset] = col;
-               parent[col] = nc; /* Matlab */
-               for (p = acolst[col]; p < acolend[col]; p++) {
-                       row = firstcol[arow[p]];
-                       if (row >= col) continue;
-                       rset = find (row);
-                       rroot = root[rset];
-                       if (rroot != col) {
-                               parent[rroot] = col;
-                               cset = link (cset, rset);
-                               root[cset] = col;
-                       }
-               }
-       }
-
-       SUPERLU_FREE (root);
-       SUPERLU_FREE (firstcol);
-       finalize_disjoint_sets ();
-       return 0;
-}
-
-/*
- *  q = TreePostorder (n, p);
- *
- *     Postorder a tree.
- *     Input:
- *       p is a vector of parent pointers for a forest whose
- *        vertices are the integers 0 to n-1; p[root]==n.
- *     Output:
- *       q is a vector indexed by 0..n-1 such that q[i] is the
- *       i-th vertex in a postorder numbering of the tree.
- *
- *        ( 2/7/95 modified by X.Li:
- *          q is a vector indexed by 0:n-1 such that vertex i is the
- *          q[i]-th vertex in a postorder numbering of the tree.
- *          That is, this is the inverse of the previous q. )
- *
- *     In the child structure, lower-numbered children are represented
- *     first, so that a tree which is already numbered in postorder
- *     will not have its order changed.
- *    
- *  Written by John Gilbert, Xerox, 10 Dec 1990.
- *  Based on code written by John Gilbert at CMI in 1987.
- */
-
-static int     *first_kid, *next_kid;  /* Linked list of children.     */
-static int     *post, postnum;
-
-static
-/*
- * Depth-first search from vertex v.
- */
-void etdfs (
-       int     v
-       )
-{
-       int     w;
-
-       for (w = first_kid[v]; w != -1; w = next_kid[w]) {
-               etdfs (w);
-       }
-       /* post[postnum++] = v; in Matlab */
-       post[v] = postnum++;    /* Modified by X.Li on 2/14/95 */
-}
-
-
-/*
- * Post order a tree
- */
-int *TreePostorder(
-       int n,
-       int *parent
-)
-{
-       int     v, dad;
-
-       /* Allocate storage for working arrays and results      */
-       first_kid =     mxCallocInt (n+1);
-       next_kid  =     mxCallocInt (n+1);
-       post      =     mxCallocInt (n+1);
-
-       /* Set up structure describing children */
-       for (v = 0; v <= n; first_kid[v++] = -1);
-       for (v = n-1; v >= 0; v--) {
-               dad = parent[v];
-               next_kid[v] = first_kid[dad];
-               first_kid[dad] = v;
-       }
-
-       /* Depth-first search from dummy root vertex #n */
-       postnum = 0;
-       etdfs (n);
-
-       SUPERLU_FREE (first_kid);
-       SUPERLU_FREE (next_kid);
-       return post;
-}
-
-
-/*
- *      p = spsymetree (A);
- *
- *      Find the elimination tree for symmetric matrix A.
- *      This uses Liu's algorithm, and runs in time O(nz*log n).
- *
- *      Input:
- *        Square sparse matrix A.  No check is made for symmetry;
- *        elements below and on the diagonal are ignored.
- *        Numeric values are ignored, so any explicit zeros are 
- *        treated as nonzero.
- *      Output:
- *        Integer array of parents representing the etree, with n
- *        meaning a root of the elimination forest.
- *      Note:  
- *        This routine uses only the upper triangle, while sparse
- *        Cholesky (as in spchol.c) uses only the lower.  Matlab's
- *        dense Cholesky uses only the upper.  This routine could
- *        be modified to use the lower triangle either by transposing
- *        the matrix or by traversing it by rows with auxiliary
- *        pointer and link arrays.
- *
- *      John R. Gilbert, Xerox, 10 Dec 1990
- *      Based on code by JRG dated 1987, 1988, and 1990.
- *      Modified by X.S. Li, November 1999.
- */
-
-/*
- * Symmetric elimination tree
- */
-int
-sp_symetree(
-           int *acolst, int *acolend, /* column starts and ends past 1 */
-           int *arow,            /* row indices of A */
-           int n,                /* dimension of A */
-           int *parent     /* parent in elim tree */
-           )
-{
-       int     *root;              /* root of subtree of etree         */
-       int     rset, cset;             
-       int     row, col;
-       int     rroot;
-       int     p;
-
-       root = mxCallocInt (n);
-       initialize_disjoint_sets (n);
-
-       for (col = 0; col < n; col++) {
-               cset = make_set (col);
-               root[cset] = col;
-               parent[col] = n; /* Matlab */
-               for (p = acolst[col]; p < acolend[col]; p++) {
-                       row = arow[p];
-                       if (row >= col) continue;
-                       rset = find (row);
-                       rroot = root[rset];
-                       if (rroot != col) {
-                               parent[rroot] = col;
-                               cset = link (cset, rset);
-                               root[cset] = col;
-                       }
-               }
-       }
-       SUPERLU_FREE (root);
-       finalize_disjoint_sets ();
-       return 0;
-} /* SP_SYMETREE */
diff --git a/superlu/sp_ienv.c b/superlu/sp_ienv.c
deleted file mode 100644
index 66a854df..00000000
--- a/superlu/sp_ienv.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-/*
- * File name:          sp_ienv.c
- * History:             Modified from lapack routine ILAENV
- */
-#include "slu_Cnames.h"
-extern void xerbla_();
-
-int
-sp_ienv(int ispec)
-{
-/*
-    Purpose   
-    =======   
-
-    sp_ienv() is inquired to choose machine-dependent parameters for the
-    local environment. See ISPEC for a description of the parameters.   
-
-    This version provides a set of parameters which should give good,   
-    but not optimal, performance on many of the currently available   
-    computers.  Users are encouraged to modify this subroutine to set   
-    the tuning parameters for their particular machine using the option   
-    and problem size information in the arguments.   
-
-    Arguments   
-    =========   
-
-    ISPEC   (input) int
-            Specifies the parameter to be returned as the value of SP_IENV.   
-            = 1: the panel size w; a panel consists of w consecutive
-                columns of matrix A in the process of Gaussian elimination.
-                The best value depends on machine's cache characters.
-            = 2: the relaxation parameter relax; if the number of
-                nodes (columns) in a subtree of the elimination tree is less
-                than relax, this subtree is considered as one supernode,
-                regardless of their row structures.
-            = 3: the maximum size for a supernode;
-           = 4: the minimum row dimension for 2-D blocking to be used;
-           = 5: the minimum column dimension for 2-D blocking to be used;
-           = 6: the estimated fills factor for L and U, compared with A;
-           
-   (SP_IENV) (output) int
-            >= 0: the value of the parameter specified by ISPEC   
-            < 0:  if SP_IENV = -k, the k-th argument had an illegal value. 
-  
-    ===================================================================== 
-*/
-    int i;
-
-    switch (ispec) {
-       case 1: return (10);
-       case 2: return (5);
-       case 3: return (100);
-       case 4: return (200);
-       case 5: return (40);
-        case 6: return (20);
-    }
-
-    /* Invalid value for ISPEC */
-    i = 1;
-    xerbla_("sp_ienv", &i);
-    return 0;
-
-} /* sp_ienv_ */
-
diff --git a/superlu/sp_preorder.c b/superlu/sp_preorder.c
deleted file mode 100644
index cd1a5264..00000000
--- a/superlu/sp_preorder.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#include "slu_ddefs.h"
-
-void
-sp_preorder(superlu_options_t *options,  SuperMatrix *A, int *perm_c, 
-           int *etree, SuperMatrix *AC)
-{
-/*
- * Purpose
- * =======
- *
- * sp_preorder() permutes the columns of the original matrix. It performs
- * the following steps:
- *
- *    1. Apply column permutation perm_c[] to A's column pointers to form AC;
- *
- *    2. If options->Fact = DOFACT, then
- *       (1) Compute column elimination tree etree[] of AC'AC;
- *       (2) Post order etree[] to get a postordered elimination tree etree[],
- *           and a postorder permutation post[];
- *       (3) Apply post[] permutation to columns of AC;
- *       (4) Overwrite perm_c[] with the product perm_c * post.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         Specifies whether or not the elimination tree will be re-used.
- *         If options->Fact == DOFACT, this means first time factor A, 
- *         etree is computed, postered, and output.
- *         Otherwise, re-factor A, etree is input, unchanged on exit.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = NC or SLU_NCP; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *        Column permutation vector of size A->ncol, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *         If options->Fact == DOFACT, perm_c is both input and output.
- *         On output, it is changed according to a postorder of etree.
- *         Otherwise, perm_c is input.
- *
- * etree   (input/output) int*
- *         Elimination tree of Pc'*A'*A*Pc, dimension A->ncol.
- *         If options->Fact == DOFACT, etree is an output argument,
- *         otherwise it is an input argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * AC      (output) SuperMatrix*
- *         The resulting matrix after applied the column permutation
- *         perm_c[] to matrix A. The type of AC can be:
- *         Stype = SLU_NCP; Dtype = A->Dtype; Mtype = SLU_GE.
- *
- */
-
-    NCformat  *Astore;
-    NCPformat *ACstore;
-    int       *iwork, *post;
-    register  int n, i;
-
-    n = A->ncol;
-    
-    /* Apply column permutation perm_c to A's column pointers so to
-       obtain NCP format in AC = A*Pc.  */
-    AC->Stype       = SLU_NCP;
-    AC->Dtype       = A->Dtype;
-    AC->Mtype       = A->Mtype;
-    AC->nrow        = A->nrow;
-    AC->ncol        = A->ncol;
-    Astore          = A->Store;
-    ACstore = AC->Store = (void *) SUPERLU_MALLOC( sizeof(NCPformat) );
-    if ( !ACstore ) ABORT("SUPERLU_MALLOC fails for ACstore");
-    ACstore->nnz    = Astore->nnz;
-    ACstore->nzval  = Astore->nzval;
-    ACstore->rowind = Astore->rowind;
-    ACstore->colbeg = (int*) SUPERLU_MALLOC(n*sizeof(int));
-    if ( !(ACstore->colbeg) ) ABORT("SUPERLU_MALLOC fails for 
ACstore->colbeg");
-    ACstore->colend = (int*) SUPERLU_MALLOC(n*sizeof(int));
-    if ( !(ACstore->colend) ) ABORT("SUPERLU_MALLOC fails for 
ACstore->colend");
-
-#ifdef DEBUG
-    print_int_vec("pre_order:", n, perm_c);
-    check_perm("Initial perm_c", n, perm_c);
-#endif      
-
-    for (i = 0; i < n; i++) {
-       ACstore->colbeg[perm_c[i]] = Astore->colptr[i]; 
-       ACstore->colend[perm_c[i]] = Astore->colptr[i+1];
-    }
-       
-    if ( options->Fact == DOFACT ) {
-#undef ETREE_ATplusA
-#ifdef ETREE_ATplusA
-        /*--------------------------------------------
-         COMPUTE THE ETREE OF Pc*(A'+A)*Pc'.
-         --------------------------------------------*/
-        int *b_colptr, *b_rowind, bnz, j;
-       int *c_colbeg, *c_colend;
-
-        /*printf("Use etree(A'+A)\n");*/
-
-       /* Form B = A + A'. */
-       at_plus_a(n, Astore->nnz, Astore->colptr, Astore->rowind,
-                 &bnz, &b_colptr, &b_rowind);
-
-       /* Form C = Pc*B*Pc'. */
-       c_colbeg = (int*) SUPERLU_MALLOC(2*n*sizeof(int));
-       c_colend = c_colbeg + n;
-       if (!c_colbeg ) ABORT("SUPERLU_MALLOC fails for c_colbeg/c_colend");
-       for (i = 0; i < n; i++) {
-           c_colbeg[perm_c[i]] = b_colptr[i]; 
-           c_colend[perm_c[i]] = b_colptr[i+1];
-       }
-       for (j = 0; j < n; ++j) {
-           for (i = c_colbeg[j]; i < c_colend[j]; ++i) {
-               b_rowind[i] = perm_c[b_rowind[i]];
-           }
-       }
-
-       /* Compute etree of C. */
-       sp_symetree(c_colbeg, c_colend, b_rowind, n, etree);
-
-       SUPERLU_FREE(b_colptr);
-       if ( bnz ) SUPERLU_FREE(b_rowind);
-       SUPERLU_FREE(c_colbeg);
-       
-#else
-        /*--------------------------------------------
-         COMPUTE THE COLUMN ELIMINATION TREE.
-         --------------------------------------------*/
-       sp_coletree(ACstore->colbeg, ACstore->colend, ACstore->rowind,
-                   A->nrow, A->ncol, etree);
-#endif
-#ifdef DEBUG   
-       print_int_vec("etree:", n, etree);
-#endif 
-       
-       /* In symmetric mode, do not do postorder here. */
-       if ( options->SymmetricMode == NO ) {
-           /* Post order etree */
-           post = (int *) TreePostorder(n, etree);
-           /* for (i = 0; i < n+1; ++i) inv_post[post[i]] = i;
-              iwork = post; */
-
-#ifdef DEBUG
-           print_int_vec("post:", n+1, post);
-           check_perm("post", n, post);        
-#endif 
-           iwork = (int*) SUPERLU_MALLOC((n+1)*sizeof(int)); 
-           if ( !iwork ) ABORT("SUPERLU_MALLOC fails for iwork[]");
-
-           /* Renumber etree in postorder */
-           for (i = 0; i < n; ++i) iwork[post[i]] = post[etree[i]];
-           for (i = 0; i < n; ++i) etree[i] = iwork[i];
-
-#ifdef DEBUG   
-           print_int_vec("postorder etree:", n, etree);
-#endif
-       
-           /* Postmultiply A*Pc by post[] */
-           for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colbeg[i];
-           for (i = 0; i < n; ++i) ACstore->colbeg[i] = iwork[i];
-           for (i = 0; i < n; ++i) iwork[post[i]] = ACstore->colend[i];
-           for (i = 0; i < n; ++i) ACstore->colend[i] = iwork[i];
-
-           for (i = 0; i < n; ++i)
-               iwork[i] = post[perm_c[i]];  /* product of perm_c and post */
-           for (i = 0; i < n; ++i) perm_c[i] = iwork[i];
-
-#ifdef DEBUG
-           print_int_vec("Pc*post:", n, perm_c);
-           check_perm("final perm_c", n, perm_c);      
-#endif
-           SUPERLU_FREE (post);
-           SUPERLU_FREE (iwork);
-       } /* end postordering */
-
-    } /* if options->Fact == DOFACT ... */
-
-}
-
-int check_perm(char *what, int n, int *perm)
-{
-    register int i;
-    int          *marker;
-    marker = (int *) calloc(n, sizeof(int));
-
-    for (i = 0; i < n; ++i) {
-       if ( marker[perm[i]] == 1 || perm[i] >= n ) {
-           printf("%s: Not a valid PERM[%d] = %d\n", what, i, perm[i]);
-           ABORT("check_perm");
-       } else {
-           marker[perm[i]] = 1;
-       }
-    }
-
-    SUPERLU_FREE(marker);
-    return 0;
-}
diff --git a/superlu/spanel_bmod.c b/superlu/spanel_bmod.c
deleted file mode 100644
index 91bbb738..00000000
--- a/superlu/spanel_bmod.c
+++ /dev/null
@@ -1,462 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
-
-
-/* 
- * Function prototypes 
- */
-void slsolve(int, int, float *, float *);
-void smatvec(int, int, int, float *, float *, float *);
-extern void scheck_tempv();
-
-void
-spanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           float     *dense,     /* out, of size n by w */
-           float     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
-{
-/* 
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int          incx = 1, incy = 1;
-    float       alpha, beta;
-#endif
-
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    float       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    float       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    float       *dense_col;  /* dense[] for a column in the panel */
-    float       *tempv1;             /* Used in 1-D update */
-    float       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    float      zero = 0.0;
-    float      one = 1.0;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-
-       repfnz_col = repfnz;
-       dense_col = dense;
-       
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-
-           TriTmp = tempv;
-       
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-           
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-       
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-
-               } else  {       /* segsze >= 4 */
-                   
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#else
-                   strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#endif
-#else          
-                   slsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-#endif
-                   
-
-               } /* else ... */
-           
-           }  /* for jj ... end tri-solves */
-
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   
-#ifdef USE_VENDOR_BLAS
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#else
-                   sgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#endif
-#else
-                   smatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-#endif
-                   
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= MatvecTmp[i];
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-                   
-               } /* for jj ... */
-               
-           } /* for each block row ... */
-           
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-               
-           } /* for jj ... */
-           
-       } else { /* 1-D block modification */
-           
-           
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += segsze * (segsze - 1);
-               ops[GEMV] += 2 * nrow * segsze;
-               
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       dense_col[irow] -= ukj * lusup[luptr];
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       ukj -= ukj1 * lusup[luptr1];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           dense_col[irow] -= (ukj*lusup[luptr]
-                                               + ukj1*lusup[luptr1]);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       ukj1 -= ukj2 * lusup[luptr2-1];
-                       ukj = ukj - ukj1*lusup[luptr1] - ukj2*lusup[luptr2];
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           dense_col[irow] -= ( ukj*lusup[luptr]
-                             + ukj1*lusup[luptr1] + ukj2*lusup[luptr2] );
-                       }
-                   }
-
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-                   
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#else
-                   strsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#endif
-                   
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   SGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-                   sgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-                   slsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   smatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-#endif
-                   
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] -= tempv1[i];
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-                   
-               } /* else segsze>=4 ... */
-               
-           } /* for each column in the panel... */
-           
-       } /* else 1-D update ... */
-
-    } /* for each updating supernode ... */
-
-}
-
-
-
diff --git a/superlu/spanel_dfs.c b/superlu/spanel_dfs.c
deleted file mode 100644
index d32af402..00000000
--- a/superlu/spanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_sdefs.h"
-
-void
-spanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          float     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
-{
-/*
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    float    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
visited 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    float    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-#endif
-
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-               
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
kperm);
-#endif
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-                   
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-#endif
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                             
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
%d\n",chrep,myfnz,kchild,chperm);
-#endif
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
maxdfs);
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-#endif
-                                   } /* else */
-                                 
-                               } /* else */
-                             
-                           } /* if... */
-                           
-                       } /* while xdfs < maxdfs */
-                       
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-                       
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
krep,xdfs,maxdfs);
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-#endif
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
*/
-                   
-               } /* else */
-               
-           } /* else */
-           
-       } /* for each nonz in A[*,jj] */
-       
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-       
-    } /* for jj ... */
-    
-}
diff --git a/superlu/spivotL.c b/superlu/spivotL.c
deleted file mode 100644
index ee66dbce..00000000
--- a/superlu/spivotL.c
+++ /dev/null
@@ -1,182 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-
-#undef DEBUG
-
-int
-spivotL(
-        const int  jcol,     /* in */
-        const float u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
-{
-/*
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    float       pivmax, rtemp, thresh;
-    float       temp;
-    float       *lu_sup_ptr; 
-    float       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    float       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-}
-#endif
-    
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-       rtemp = fabs (lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-
-    thresh = u * pivmax;
-    
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = fabs (lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-           rtemp = fabs (lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-
-    /* cdiv operation */
-    ops[FACT] += nsupr - nsupc;
-
-    temp = 1.0 / lu_col_ptr[nsupc];
-    for (k = nsupc+1; k < nsupr; k++) 
-       lu_col_ptr[k] *= temp;
-
-    return 0;
-}
-
diff --git a/superlu/spivotgrowth.c b/superlu/spivotgrowth.c
deleted file mode 100644
index 05a24463..00000000
--- a/superlu/spivotgrowth.c
+++ /dev/null
@@ -1,129 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include <math.h>
-#include "slu_sdefs.h"
-
-float
-sPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
-{
-/*
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_S; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_S; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_S; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    float   rpg, maxaj, maxuj;
-    extern   double slamch_(char *);
-    float   smlnum;
-    float   *luval;
-   
-    /* Get machine constants. */
-    smlnum = slamch_("S");
-    rpg = 1. / smlnum;
-
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, fabs(Aval[i]) );
-       
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, fabs(Uval[i]) );
-           
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, fabs(luval[i]) );
-
-           ++nz_in_U;
-           luval += nsupr;
-
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       
-       if ( j >= ncols ) break;
-    }
-
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
-}
diff --git a/superlu/spruneL.c b/superlu/spruneL.c
deleted file mode 100644
index 0d5755fb..00000000
--- a/superlu/spruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_sdefs.h"
-
-void
-spruneL(
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
-{
-/*
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    float     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    float     *lusup;
-    int        *xlusup;
-
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           
-           if ( do_prune ) {
-
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-
-               while ( kmin <= kmax ) {
-
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-
-                       kmin++;
-                       kmax--;
-
-                   }
-
-               } /* while */
-
-               xprune[irep] = kmin;    /* Pruning */
-
-#ifdef CHK_PRUNE
-       printf("    After spruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-#endif
-           } /* if do_prune */
-
-       } /* if */
-
-    } /* for each U-segment... */
-}
diff --git a/superlu/sreadhb.c b/superlu/sreadhb.c
deleted file mode 100644
index 938f30a3..00000000
--- a/superlu/sreadhb.c
+++ /dev/null
@@ -1,276 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_sdefs.h"
-
-
-/* Eat up the rest of the current line */
-int sDumpLine(FILE *fp)
-{
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-}
-
-int sParseIntFormat(char *buf, int *num, int *size)
-{
-    char *tmp;
-
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-}
-
-int sParseFloatFormat(char *buf, int *num, int *size)
-{
-    char *tmp, *period;
-    
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-
-    return 0;
-}
-
-int sReadVector(FILE *fp, int n, int *where, int perline, int persize)
-{
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-
-    return 0;
-}
-
-int sReadValues(FILE *fp, int n, float *destination, int perline, int persize)
-{
-    register int i, j, k, s;
-    char tmp, buf[100], *dummy;
-    
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           s = j*persize;
-           for (k = 0; k < persize; ++k) /* No D_ format in C */
-               if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-           destination[i++] = atof(&buf[s]);
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-       }
-    }
-
-    return 0;
-}
-
-
-
-void
-sreadhb(int *nrow, int *ncol, int *nonz,
-       float **nzval, int **rowind, int **colptr)
-{
-/* 
- * Purpose
- * =======
- * 
- * Read a FLOAT PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
(VALFMT) 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-
-  register int i, numer_lines = 0, rhscrd = 0, dummy;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-
-    fp = stdin;
-
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    fputs(buf, stdout);
-#if 0
-    dummy = fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    sDumpLine(fp);
-#endif
-
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    sDumpLine(fp);
-
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-#endif
-    
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    sDumpLine(fp);
-
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    sallocateA(*ncol, *nonz, nzval, rowind, colptr);
-
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    sParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    sParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    sParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    sDumpLine(fp);
-
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) sDumpLine(fp); /* skip RHSFMT */
-    
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-#endif
-    
-    sReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    sReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        sReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    
-    fclose(fp);
-
-}
-
diff --git a/superlu/ssnode_bmod.c b/superlu/ssnode_bmod.c
deleted file mode 100644
index a476dd5f..00000000
--- a/superlu/ssnode_bmod.c
+++ /dev/null
@@ -1,115 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
-
-
-/*
- * Performs numeric block updates within the relaxed snode. 
- */
-int
-ssnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           float     *dense,    /* in */
-           float     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-{
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int            incx = 1, incy = 1;
-    float         alpha = -1.0, beta = 1.0;
-#endif
-
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    float         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-
-    nextlu = xlusup[jcol];
-    
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-       dense[irow] = 0;
-       ++nextlu;
-    }
-
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    
-    if ( fsupc < jcol ) {
-
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-
-       ops[TRSV] += nsupc * (nsupc - 1);
-       ops[GEMV] += 2 * nrow * nsupc;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       SGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       strsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       sgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       slsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       smatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           lusup[iptr++] -= tempv[i];
-           tempv[i] = 0.0;
-       }
-#endif
-
-    }
-
-    return 0;
-}
diff --git a/superlu/ssnode_dfs.c b/superlu/ssnode_dfs.c
deleted file mode 100644
index b1536c25..00000000
--- a/superlu/ssnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_sdefs.h"
-
-int
-ssnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-{
-/* Purpose
- * =======
- *    ssnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = sLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-
-    return 0;
-}
-
diff --git a/superlu/ssp_blas2.c b/superlu/ssp_blas2.c
deleted file mode 100644
index 0e14c00b..00000000
--- a/superlu/ssp_blas2.c
+++ /dev/null
@@ -1,481 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
- * File name:          ssp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "slu_sdefs.h"
-extern void strsv_();
-extern void sgemv_();
-
-/* 
- * Function prototypes 
- */
-void susolve(int, int, float*, float*);
-void slsolve(int, int, float*, float*);
-void smatvec(int, int, int, float*, float*, float*);
-
-
-int
-sp_strsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, float *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_strsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A'*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_S, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_S, Mtype = TRU.
- *    
- *   x       - (input/output) float*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    float   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    float alpha = 1.0, beta = 1.0;
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    float *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_strsv", &i);
-       return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = floatCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_strsv().");
-    
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-
-               solve_ops += nsupc * (nsupc - 1);
-               solve_ops += 2 * nrow * nsupc;
-
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       x[irow] -= x[fsupc] * Lval[luptr];
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   SGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-                   strsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   sgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-                   slsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-               
-                   smatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif         
-               
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       x[irow] -= work[i];     /* Scatter */
-                       work[i] = 0.0;
-
-                   }
-               }
-           } /* for k ... */
-           
-       } else {
-           /* Form x := inv(U)*x */
-           
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               
-               solve_ops += nsupc * (nsupc + 1);
-
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       x[irow] -= x[fsupc] * Uval[i];
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   STRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-#else
-                   strsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else          
-                   susolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif         
-
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                           x[irow] -= x[jcol] * Uval[i];
-                       }
-                    }
-               }
-           } /* for k ... */
-           
-       }
-    } else { /* Form x := inv(A')*x */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               solve_ops += 2 * (nsupr - nsupc) * nsupc;
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       x[jcol] -= x[irow] * Lval[i];
-                       iptr++;
-                   }
-               }
-               
-               if ( nsupc > 1 ) {
-                   solve_ops += nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   STRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#else
-                   strsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#endif
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 2*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       x[jcol] -= x[irow] * Uval[i];
-                   }
-               }
-
-               solve_ops += nsupc * (nsupc + 1);
-
-               if ( nsupc == 1 ) {
-                   x[fsupc] /= Lval[luptr];
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   STRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#else
-                   strsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-               }
-           } /* for k ... */
-       }
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-
-int
-sp_sgemv(char *trans, float alpha, SuperMatrix *A, float *x, 
-        int incx, float beta, float *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_sgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) float
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    X      - (input) float*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) float
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) float*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-            
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    float   *Aval;
-    int info;
-    float temp;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_sgemv ", &info);
-       return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || (alpha == 0. && beta == 1.))
-       return 0;
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if (beta != 1.) {
-       if (incy == 1) {
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) y[i] = 0.;
-           else
-               for (i = 0; i < leny; ++i) y[i] = beta * y[i];
-       } else {
-           iy = ky;
-           if (beta == 0.)
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = 0.;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = beta * y[iy];
-                   iy += incy;
-               }
-       }
-    }
-    
-    if (alpha == 0.) return 0;
-
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if (x[jx] != 0.) {
-                   temp = alpha * x[jx];
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       y[irow] += temp * Aval[i];
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = 0.;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   temp += Aval[i] * x[irow];
-               }
-               y[jy] += alpha * temp;
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;
-} /* sp_sgemv */
-
-
-
diff --git a/superlu/ssp_blas3.c b/superlu/ssp_blas3.c
deleted file mode 100644
index f958ac85..00000000
--- a/superlu/ssp_blas3.c
+++ /dev/null
@@ -1,140 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-
-#include "slu_sdefs.h"
-
-int
-sp_sgemm(char *transa, char *transb, int m, int n, int k, 
-         float alpha, SuperMatrix *A, float *b, int ldb, 
-         float beta, float *c, int ldc)
-{
-/*  Purpose   
-    =======   
-
-    sp_s performs one of the matrix-matrix operations   
-
-       C := alpha*op( A )*op( B ) + beta*C,   
-
-    where  op( X ) is one of 
-
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-  
-
-    Parameters   
-    ==========   
-
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-            
-    ALPHA  - (input) float
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_S; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    B      - FLOAT PRECISION array of DIMENSION ( LDB, kb ), where kb is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-
-    BETA   - (input) float
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-
-    C      - FLOAT PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-
-    ==== Sparse Level 3 Blas routine.   
-*/
-    int    incx = 1, incy = 1;
-    int    j;
-
-    for (j = 0; j < n; ++j) {
-       sp_sgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
-}
diff --git a/superlu/superlu_timer.c b/superlu/superlu_timer.c
deleted file mode 100644
index dedf19e9..00000000
--- a/superlu/superlu_timer.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/* 
- * Purpose
- * ======= 
- *     Returns the time in seconds used by the process.
- *
- * Note: the timer function call is machine dependent. Use conditional
- *       compilation to choose the appropriate function.
- *
- */
-#ifdef _MSC_VER
-#define NO_TIMER
-#endif
-
-#ifdef SUN 
-/*
- *     It uses the system call gethrtime(3C), which is accurate to 
- *     nanoseconds. 
-*/
-#include <sys/time.h>
- 
-double SuperLU_timer_() {
-    return ( (double)gethrtime() / 1e9 );
-}
-
-#else
-
-#ifndef NO_TIMER
-#include <sys/types.h>
-#include <sys/times.h>
-#include <time.h>
-#include <sys/time.h>
-#endif
-
-#ifndef CLK_TCK
-#define CLK_TCK 60
-#endif
-
-double SuperLU_timer_()
-{
-#ifdef NO_TIMER
-    /* no sys/times.h on WIN32 */
-    double tmp;
-    tmp = 0.0;
-#else
-    struct tms use;
-    double tmp;
-    times(&use);
-    tmp = use.tms_utime;
-    tmp += use.tms_stime;
-#endif
-    return (double)(tmp) / CLK_TCK;
-}
-
-#endif
-
diff --git a/superlu/supermatrix.h b/superlu/supermatrix.h
deleted file mode 100644
index c3dd640b..00000000
--- a/superlu/supermatrix.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#ifndef __SUPERLU_SUPERMATRIX /* allow multiple inclusions */
-#define __SUPERLU_SUPERMATRIX
-
-/********************************************
- * The matrix types are defined as follows. *
- ********************************************/
-typedef enum {
-    SLU_NC,    /* column-wise, no supernode */
-    SLU_NR,    /* row-wize, no supernode */
-    SLU_SC,    /* column-wise, supernode */
-    SLU_SR,    /* row-wise, supernode */
-    SLU_NCP,   /* column-wise, column-permuted, no supernode 
-                  (The consecutive columns of nonzeros, after permutation,
-                  may not be stored  contiguously.) */
-    SLU_DN     /* Fortran style column-wise storage for dense matrix */
-} Stype_t;
-
-typedef enum {
-    SLU_S,     /* single */
-    SLU_D,     /* double */
-    SLU_C,     /* single complex */
-    SLU_Z      /* double complex */
-} Dtype_t;
-
-typedef enum {
-    SLU_GE,    /* general */
-    SLU_TRLU,  /* lower triangular, unit diagonal */
-    SLU_TRUU,  /* upper triangular, unit diagonal */
-    SLU_TRL,   /* lower triangular */
-    SLU_TRU,   /* upper triangular */
-    SLU_SYL,   /* symmetric, store lower half */
-    SLU_SYU,   /* symmetric, store upper half */
-    SLU_HEL,   /* Hermitian, store lower half */
-    SLU_HEU    /* Hermitian, store upper half */
-} Mtype_t;
-
-typedef struct {
-       Stype_t Stype; /* Storage type: interprets the storage structure 
-                         pointed to by *Store. */
-       Dtype_t Dtype; /* Data type. */
-       Mtype_t Mtype; /* Matrix type: describes the mathematical property of 
-                         the matrix. */
-       int_t  nrow;   /* number of rows */
-       int_t  ncol;   /* number of columns */
-       void *Store;   /* pointer to the actual storage of the matrix */
-} SuperMatrix;
-
-/***********************************************
- * The storage schemes are defined as follows. *
- ***********************************************/
-
-/* Stype == NC (Also known as Harwell-Boeing sparse matrix format) */
-typedef struct {
-    int_t  nnz;            /* number of nonzeros in the matrix */
-    void   *nzval;  /* pointer to array of nonzero values, packed by column */
-    int_t  *rowind; /* pointer to array of row indices of the nonzeros */
-    int_t  *colptr; /* pointer to array of beginning of columns in nzval[] 
-                      and rowind[]  */
-                    /* Note:
-                      Zero-based indexing is used;
-                      colptr[] has ncol+1 entries, the last one pointing
-                      beyond the last column, so that colptr[ncol] = nnz. */
-} NCformat;
-
-/* Stype == NR (Also known as row compressed storage (RCS). */
-typedef struct {
-    int_t nnz;    /* number of nonzeros in the matrix */
-    void  *nzval;  /* pointer to array of nonzero values, packed by row */
-    int_t *colind; /* pointer to array of column indices of the nonzeros */
-    int_t *rowptr; /* pointer to array of beginning of rows in nzval[] 
-                      and colind[]  */
-                   /* Note:
-                     Zero-based indexing is used;
-                     nzval[] and colind[] are of the same length, nnz;
-                     rowptr[] has nrow+1 entries, the last one pointing
-                     beyond the last column, so that rowptr[nrow] = nnz. */
-} NRformat;
-
-/* Stype == SC */
-typedef struct {
-  int_t  nnz;       /* number of nonzeros in the matrix */
-  int_t  nsuper;     /* number of supernodes, minus 1 */
-  void *nzval;       /* pointer to array of nonzero values, packed by column */
-  int_t *nzval_colptr;/* pointer to array of beginning of columns in nzval[] */
-  int_t *rowind;     /* pointer to array of compressed row indices of 
-                       rectangular supernodes */
-  int_t *rowind_colptr;/* pointer to array of beginning of columns in rowind[] 
*/
-  int_t *col_to_sup; /* col_to_sup[j] is the supernode number to which column 
-                       j belongs; mapping from column to supernode number. */
-  int_t *sup_to_col; /* sup_to_col[s] points to the start of the s-th 
-                       supernode; mapping from supernode number to column.
-                       e.g.: col_to_sup: 0 1 2 2 3 3 3 4 4 4 4 4 4 (ncol=12)
-                             sup_to_col: 0 1 2 4 7 12           (nsuper=4) */
-                     /* Note:
-                       Zero-based indexing is used;
-                       nzval_colptr[], rowind_colptr[], col_to_sup and
-                       sup_to_col[] have ncol+1 entries, the last one
-                       pointing beyond the last column.
-                       For col_to_sup[], only the first ncol entries are
-                       defined. For sup_to_col[], only the first nsuper+2
-                       entries are defined. */
-} SCformat;
-
-/* Stype == NCP */
-typedef struct {
-    int_t nnz;   /* number of nonzeros in the matrix */
-    void *nzval;  /* pointer to array of nonzero values, packed by column */
-    int_t *rowind;/* pointer to array of row indices of the nonzeros */
-                 /* Note: nzval[]/rowind[] always have the same length */
-    int_t *colbeg;/* colbeg[j] points to the beginning of column j in nzval[] 
-                     and rowind[]  */
-    int_t *colend;/* colend[j] points to one past the last element of column
-                    j in nzval[] and rowind[]  */
-                 /* Note:
-                    Zero-based indexing is used;
-                    The consecutive columns of the nonzeros may not be 
-                    contiguous in storage, because the matrix has been 
-                    postmultiplied by a column permutation matrix. */
-} NCPformat;
-
-/* Stype == DN */
-typedef struct {
-    int_t lda;    /* leading dimension */
-    void *nzval;  /* array of size lda*ncol to represent a dense matrix */
-} DNformat;
-
-
-
-/*********************************************************
- * Macros used for easy access of sparse matrix entries. *
- *********************************************************/
-#define L_SUB_START(col)     ( Lstore->rowind_colptr[col] )
-#define L_SUB(ptr)           ( Lstore->rowind[ptr] )
-#define L_NZ_START(col)      ( Lstore->nzval_colptr[col] )
-#define L_FST_SUPC(superno)  ( Lstore->sup_to_col[superno] )
-#define U_NZ_START(col)      ( Ustore->colptr[col] )
-#define U_SUB(ptr)           ( Ustore->rowind[ptr] )
-
-#ifdef __cplusplus
-extern "C" 
-#endif
-int handle_getfem_callback(); /* this one is in ../src/getfem_superlu.cc */
-
-#endif  /* __SUPERLU_SUPERMATRIX */
diff --git a/superlu/sutil.c b/superlu/sutil.c
deleted file mode 100644
index 09b51d49..00000000
--- a/superlu/sutil.c
+++ /dev/null
@@ -1,478 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include "slu_sdefs.h"
-
-void
-sCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      float *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NCformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-}
-
-void
-sCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      float *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NRformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-}
-
-/* Copy matrix A into matrix B. */
-void
-sCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((float *)Bstore->nzval)[i] = ((float *)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-}
-
-
-void
-sCreate_Dense_Matrix(SuperMatrix *X, int m, int n, float *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    DNformat    *Xstore;
-    
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (float *) x;
-}
-
-void
-sCopy_Dense_Matrix(int M, int N, float *X, int ldx,
-                       float *Y, int ldy)
-{
-/*
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-}
-
-void
-sCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       float *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    SCformat *Lstore;
-
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
-
-}
-
-
-/*
- * Convert a row compressed storage into a column compressed storage.
- */
-void
-sCompRow_to_CompCol(int m, int n, int nnz, 
-                   float *a, int *colind, int *rowptr,
-                   float **at, int **rowind, int **colptr)
-{
-    register int i, j, col, relpos;
-    int *marker;
-
-    /* Allocate storage for another copy of the matrix. */
-    *at = (float *) floatMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-
-    SUPERLU_FREE(marker);
-}
-
-
-void
-sPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-{
-    NCformat     *Astore;
-    register int i,n;
-    float       *dp;
-    
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-sPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-{
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    float       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\n", rowind[i], j, dp[d++]);
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-#endif
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-sPrint_Dense_Matrix(char *what, SuperMatrix *A)
-{
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    float       *dp;
-    
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (float *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < A->nrow; ++i) printf("%f  ", dp[i + j*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
-}
-
-/*
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-void
-sprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-{
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    float  *lusup;
-    int     *xlusup;
-    float  *ucol;
-    int     *usub, *xusub;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f\n", usub[i], ucol[i]);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f\n", lsub[i], lusup[k]);
-       i++; k++;
-    }
-    fflush(stdout);
-}
-
-
-/*
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void scheck_tempv(int n, float *tempv)
-{
-    int i;
-       
-    for (i = 0; i < n; i++) {
-       if (tempv[i] != 0.0) 
-       {
-           fprintf(stderr,"tempv[%d] = %f\n", i,tempv[i]);
-           ABORT("scheck_tempv");
-       }
-    }
-}
-
-
-void
-sGenXtrue(int n, int nrhs, float *x, int ldx)
-{
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx] = 1.0;/* + (float)(i+1.)/n;*/
-       }
-}
-
-/*
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-void
-sFillRHS(trans_t trans, int nrhs, float *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore;
-    float   *Aval;
-    DNformat *Bstore;
-    float   *rhs;
-    float one = 1.0;
-    float zero = 0.0;
-    int      ldc;
-    char transc[1];
-
-    Astore = A->Store;
-    Aval   = (float *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-
-    sp_sgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
-
-}
-
-/* 
- * Fills a float precision array with a given value.
- */
-void 
-sfill(float *a, int alen, float dval)
-{
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
-}
-
-
-
-/* 
- * Check the inf-norm of the error vector 
- */
-void sinf_norm_error(int nrhs, SuperMatrix *X, float *xtrue)
-{
-    DNformat *Xstore;
-    float err, xnorm;
-    float *Xmat, *soln_work;
-    int i, j;
-
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-       err = SUPERLU_MAX(err, fabs(soln_work[i] - xtrue[i]));
-       xnorm = SUPERLU_MAX(xnorm, fabs(soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-}
-
-
-
-/* Print performance of the code. */
-void
-sPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           float rpg, float rcond, float *ferr,
-           float *berr, char *equed, SuperLUStat_t *stat)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    
-    utime = stat->utime;
-    ops   = stat->ops;
-    
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-       
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-       
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-    
-}
-
-
-
-
-int print_float_vec(char *what, int n, float *vec)
-{
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f\n", i, vec[i]);
-    return 0;
-}
-
diff --git a/superlu/util.c b/superlu/util.c
deleted file mode 100644
index c803162b..00000000
--- a/superlu/util.c
+++ /dev/null
@@ -1,405 +0,0 @@
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include "slu_ddefs.h"
-
-/* 
- * Global statistics variale
- */
-
-void superlu_abort_and_exit(char* msg)
-{
-  fprintf(stderr, "%s",  msg);
-  exit (-1);
-}
-
-/*
- * Set the default values for the options argument.
- */
-void set_default_options(superlu_options_t *options)
-{
-    options->Fact = DOFACT;
-    options->Equil = YES;
-    options->ColPerm = COLAMD;
-    options->DiagPivotThresh = 1.0;
-    options->Trans = NOTRANS;
-    options->IterRefine = NOREFINE;
-    options->SymmetricMode = NO;
-    options->PivotGrowth = NO;
-    options->ConditionNumber = NO;
-    options->PrintStat = YES;
-}
-
-/*
- * Print the options setting.
- */
-void print_options(superlu_options_t *options)
-{
-    printf(".. options:\n");
-    printf("\tFact\t %8d\n", options->Fact);
-    printf("\tEquil\t %8d\n", options->Equil);
-    printf("\tColPerm\t %8d\n", options->ColPerm);
-    printf("\tDiagPivotThresh %8.4f\n", options->DiagPivotThresh);
-    printf("\tTrans\t %8d\n", options->Trans);
-    printf("\tIterRefine\t%4d\n", options->IterRefine);
-    printf("\tSymmetricMode\t%4d\n", options->SymmetricMode);
-    printf("\tPivotGrowth\t%4d\n", options->PivotGrowth);
-    printf("\tConditionNumber\t%4d\n", options->ConditionNumber);
-    printf("..\n");
-}
-
-/* Deallocate the structure pointing to the actual storage of the matrix. */
-void
-Destroy_SuperMatrix_Store(SuperMatrix *A)
-{
-    SUPERLU_FREE ( A->Store );
-}
-
-void
-Destroy_CompCol_Matrix(SuperMatrix *A)
-{
-    SUPERLU_FREE( ((NCformat *)A->Store)->rowind );
-    SUPERLU_FREE( ((NCformat *)A->Store)->colptr );
-    SUPERLU_FREE( ((NCformat *)A->Store)->nzval );
-    SUPERLU_FREE( A->Store );
-}
-
-void
-Destroy_CompRow_Matrix(SuperMatrix *A)
-{
-    SUPERLU_FREE( ((NRformat *)A->Store)->colind );
-    SUPERLU_FREE( ((NRformat *)A->Store)->rowptr );
-    SUPERLU_FREE( ((NRformat *)A->Store)->nzval );
-    SUPERLU_FREE( A->Store );
-}
-
-void
-Destroy_SuperNode_Matrix(SuperMatrix *A)
-{
-    SUPERLU_FREE ( ((SCformat *)A->Store)->rowind );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->rowind_colptr );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->nzval );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->nzval_colptr );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->col_to_sup );
-    SUPERLU_FREE ( ((SCformat *)A->Store)->sup_to_col );
-    SUPERLU_FREE ( A->Store );
-}
-
-/* A is of type Stype==NCP */
-void
-Destroy_CompCol_Permuted(SuperMatrix *A)
-{
-    SUPERLU_FREE ( ((NCPformat *)A->Store)->colbeg );
-    SUPERLU_FREE ( ((NCPformat *)A->Store)->colend );
-    SUPERLU_FREE ( A->Store );
-}
-
-/* A is of type Stype==DN */
-void
-Destroy_Dense_Matrix(SuperMatrix *A)
-{
-    DNformat* Astore = A->Store;
-    SUPERLU_FREE (Astore->nzval);
-    SUPERLU_FREE ( A->Store );
-}
-
-/*
- * Reset repfnz[] for the current column 
- */
-void
-resetrep_col (const int nseg, const int *segrep, int *repfnz)
-{
-    int i, irep;
-    
-    for (i = 0; i < nseg; i++) {
-       irep = segrep[i];
-       repfnz[irep] = EMPTY;
-    }
-}
-
-
-/*
- * Count the total number of nonzeros in factors L and U,  and in the 
- * symmetrically reduced L. 
- */
-void
-countnz(const int n, int *xprune, int *nnzL, int *nnzU, GlobalLU_t *Glu)
-{
-    int          nsuper, fsupc, i, j;
-    int          nnzL0, jlen, irep;
-    int          *xsup, *xlsub;
-
-    xsup   = Glu->xsup;
-    xlsub  = Glu->xlsub;
-    *nnzL  = 0;
-    *nnzU  = (Glu->xusub)[n];
-    nnzL0  = 0;
-    nsuper = (Glu->supno)[n];
-
-    if ( n <= 0 ) return;
-
-    /* 
-     * For each supernode
-     */
-    for (i = 0; i <= nsuper; i++) {
-       fsupc = xsup[i];
-       jlen = xlsub[fsupc+1] - xlsub[fsupc];
-
-       for (j = fsupc; j < xsup[i+1]; j++) {
-           *nnzL += jlen;
-           *nnzU += j - fsupc + 1;
-           jlen--;
-       }
-       irep = xsup[i+1] - 1;
-       nnzL0 += xprune[irep] - xlsub[irep];
-    }
-    
-    /* printf("\tNo of nonzeros in symm-reduced L = %d\n", nnzL0);*/
-}
-
-
-
-/*
- * Fix up the data storage lsub for L-subscripts. It removes the subscript
- * sets for structural pruning,        and applies permuation to the remaining
- * subscripts.
- */
-void
-fixupL(const int n, const int *perm_r, GlobalLU_t *Glu)
-{
-    register int nsuper, fsupc, nextl, i, j, k, jstrt;
-    int          *xsup, *lsub, *xlsub;
-
-    if ( n <= 1 ) return;
-
-    xsup   = Glu->xsup;
-    lsub   = Glu->lsub;
-    xlsub  = Glu->xlsub;
-    nextl  = 0;
-    nsuper = (Glu->supno)[n];
-    
-    /* 
-     * For each supernode ...
-     */
-    for (i = 0; i <= nsuper; i++) {
-       fsupc = xsup[i];
-       jstrt = xlsub[fsupc];
-       xlsub[fsupc] = nextl;
-       for (j = jstrt; j < xlsub[fsupc+1]; j++) {
-           lsub[nextl] = perm_r[lsub[j]]; /* Now indexed into P*A */
-           nextl++;
-       }
-       for (k = fsupc+1; k < xsup[i+1]; k++) 
-               xlsub[k] = nextl;       /* Other columns in supernode i */
-
-    }
-
-    xlsub[n] = nextl;
-}
-
-
-/*
- * Diagnostic print of segment info after panel_dfs().
- */
-void print_panel_seg(int n, int w, int jcol, int nseg, 
-                    int *segrep, int *repfnz)
-{
-    int j, k;
-    
-    for (j = jcol; j < jcol+w; j++) {
-       printf("\tcol %d:\n", j);
-       for (k = 0; k < nseg; k++)
-           printf("\t\tseg %d, segrep %d, repfnz %d\n", k, 
-                       segrep[k], repfnz[(j-jcol)*n + segrep[k]]);
-    }
-
-}
-
-
-void
-StatInit(SuperLUStat_t *stat)
-{
-    register int i, w, panel_size, relax;
-
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-    w = SUPERLU_MAX(panel_size, relax);
-    stat->panel_histo = intCalloc(w+1);
-    stat->utime = (double *) SUPERLU_MALLOC(NPHASES * sizeof(double));
-    if (!stat->utime) ABORT("SUPERLU_MALLOC fails for stat->utime");
-    stat->ops = (flops_t *) SUPERLU_MALLOC(NPHASES * sizeof(flops_t));
-    if (!stat->ops) ABORT("SUPERLU_MALLOC fails for stat->ops");
-    for (i = 0; i < NPHASES; ++i) {
-        stat->utime[i] = 0.;
-        stat->ops[i] = 0.;
-    }
-}
-
-
-void
-StatPrint(SuperLUStat_t *stat)
-{
-    double         *utime;
-    flops_t        *ops;
-
-    utime = stat->utime;
-    ops   = stat->ops;
-    printf("Factor time  = %8.2f\n", utime[FACT]);
-    if ( utime[FACT] != 0.0 )
-      printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-            ops[FACT]*1e-6/utime[FACT]);
-
-    printf("Solve time   = %8.2f\n", utime[SOLVE]);
-    if ( utime[SOLVE] != 0.0 )
-      printf("Solve flops = %e\tMflops = %8.2f\n", ops[SOLVE],
-            ops[SOLVE]*1e-6/utime[SOLVE]);
-
-}
-
-
-void
-StatFree(SuperLUStat_t *stat)
-{
-    SUPERLU_FREE(stat->panel_histo);
-    SUPERLU_FREE(stat->utime);
-    SUPERLU_FREE(stat->ops);
-}
-
-
-flops_t
-LUFactFlops(SuperLUStat_t *stat)
-{
-    return (stat->ops[FACT]);
-}
-
-flops_t
-LUSolveFlops(SuperLUStat_t *stat)
-{
-    return (stat->ops[SOLVE]);
-}
-
-
-
-
-
-/* 
- * Fills an integer array with a given value.
- */
-void ifill(int *a, int alen, int ival)
-{
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = ival;
-}
-
-
-
-/* 
- * Get the statistics of the supernodes 
- */
-#define NBUCKS 10
-static         int     max_sup_size;
-
-void super_stats(int nsuper, int *xsup)
-{
-    register int nsup1 = 0;
-    int          i, isize, whichb, bl, bh;
-    int          bucket[NBUCKS];
-
-    max_sup_size = 0;
-
-    for (i = 0; i <= nsuper; i++) {
-       isize = xsup[i+1] - xsup[i];
-       if ( isize == 1 ) nsup1++;
-       if ( max_sup_size < isize ) max_sup_size = isize;       
-    }
-
-    printf("    Supernode statistics:\n\tno of super = %d\n", nsuper+1);
-    printf("\tmax supernode size = %d\n", max_sup_size);
-    printf("\tno of size 1 supernodes = %d\n", nsup1);
-
-    /* Histogram of the supernode sizes */
-    ifill (bucket, NBUCKS, 0);
-
-    for (i = 0; i <= nsuper; i++) {
-        isize = xsup[i+1] - xsup[i];
-        whichb = (float) isize / max_sup_size * NBUCKS;
-        if (whichb >= NBUCKS) whichb = NBUCKS - 1;
-        bucket[whichb]++;
-    }
-    
-    printf("\tHistogram of supernode sizes:\n");
-    for (i = 0; i < NBUCKS; i++) {
-        bl = (float) i * max_sup_size / NBUCKS;
-        bh = (float) (i+1) * max_sup_size / NBUCKS;
-        printf("\tsnode: %d-%d\t\t%d\n", bl+1, bh, bucket[i]);
-    }
-
-}
-
-
-float SpaSize(int n, int np, float sum_npw)
-{
-    return (sum_npw*8 + np*8 + n*4)/1024.;
-}
-
-float DenseSize(int n, float sum_nw)
-{
-    return (sum_nw*8 + n*8)/1024.;;
-}
-
-
-
-/*
- * Check whether repfnz[] == EMPTY after reset.
- */
-void check_repfnz(int n, int w, int jcol, int *repfnz)
-{
-    int jj, k;
-
-    for (jj = jcol; jj < jcol+w; jj++) 
-       for (k = 0; k < n; k++)
-           if ( repfnz[(jj-jcol)*n + k] != EMPTY ) {
-               fprintf(stderr, "col %d, repfnz_col[%d] = %d\n", jj,
-                       k, repfnz[(jj-jcol)*n + k]);
-               ABORT("check_repfnz");
-           }
-}
-
-
-/* Print a summary of the testing results. */
-void
-PrintSumm(char *type, int nfail, int nrun, int nerrs)
-{
-    if ( nfail > 0 )
-       printf("%3s driver: %d out of %d tests failed to pass the threshold\n",
-              type, nfail, nrun);
-    else
-       printf("All tests for %3s driver passed the threshold (%6d tests 
run)\n", type, nrun);
-
-    if ( nerrs > 0 )
-       printf("%6d error messages recorded\n", nerrs);
-}
-
-
-int print_int_vec(char *what, int n, int *vec)
-{
-    int i;
-    printf("%s\n", what);
-    for (i = 0; i < n; ++i) printf("%d\t%d\n", i, vec[i]);
-    return 0;
-}
diff --git a/superlu/xerbla.c b/superlu/xerbla.c
deleted file mode 100644
index c7995cb9..00000000
--- a/superlu/xerbla.c
+++ /dev/null
@@ -1,83 +0,0 @@
-#include <stdio.h>
-#include "slu_Cnames.h"
-
-/* Subroutine */ int xerbla_(char *srname, int *info)
-{
-/*  -- LAPACK auxiliary routine (version 2.0) --   
-       Copyright (c) 1992-2013 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-       Copyright (c) 2000-2013 The University of California Berkeley. All
-                        rights reserved.
-       Copyright (c) 2006-2013 The University of Colorado Denver.  All rights
-                        reserved.
-
-       Redistribution and use in source and binary forms, with or without
-       modification, are permitted provided that the following conditions are
-       met:
-
-       - Redistributions of source code must retain the above copyright
-         notice, this list of conditions and the following disclaimer.
-
-       - Redistributions in binary form must reproduce the above copyright
-         notice, this list of conditions and the following disclaimer listed
-         in this license in the documentation and/or other materials
-         provided with the distribution.
-
-       - Neither the name of the copyright holders nor the names of its
-         contributors may be used to endorse or promote products derived from
-         this software without specific prior written permission.
-
-       The copyright holders provide no reassurances that the source code
-       provided does not infringe any patent, copyright, or any other
-       intellectual property rights of third parties.  The copyright holders
-       disclaim any liability to any recipient for claims brought against
-       recipient by any third party for infringement of that parties
-       intellectual property rights.
-
-       THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-       "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-       LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-       A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-       OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-       SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-       LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-       DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-       THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-       (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-       OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
-
-    Purpose   
-    =======   
-
-    XERBLA  is an error handler for the LAPACK routines.   
-    It is called by an LAPACK routine if an input parameter has an   
-    invalid value.  A message is printed and execution stops.   
-
-    Installers may consider modifying the STOP statement in order to   
-    call system-specific exception-handling facilities.   
-
-    Arguments   
-    =========   
-
-    SRNAME  (input) CHARACTER*6   
-            The name of the routine which called XERBLA.   
-
-    INFO    (input) INT   
-            The position of the invalid parameter in the parameter list   
-
-            of the calling routine.   
-
-   ===================================================================== 
-*/
-
-    printf("** On entry to %6s, parameter number %2d had an illegal value\n",
-               srname, *info);
-
-/*     End of XERBLA */
-
-    return 0;
-} /* xerbla_ */
-
diff --git a/superlu/zcolumn_bmod.c b/superlu/zcolumn_bmod.c
deleted file mode 100644
index f5168ba5..00000000
--- a/superlu/zcolumn_bmod.c
+++ /dev/null
@@ -1,363 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
-
-
-/* 
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-
-/* Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-zcolumn_bmod (
-            const int  jcol,     /* in */
-            const int  nseg,     /* in */
-            doublecomplex     *dense,    /* in */
-            doublecomplex     *tempv,    /* working array */
-            int        *segrep,  /* in */
-            int        *repfnz,  /* in */
-            int        fpanelc,  /* in -- first column in the current panel */
-            GlobalLU_t *Glu,     /* modified */
-            SuperLUStat_t *stat  /* output */
-            )
-{
-/*
- * Purpose:
- * ========
- *    Performs numeric block updates (sup-col) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int         incx = 1, incy = 1;
-    doublecomplex      alpha, beta;
-    
-    /* krep = representative of current k-th supernode
-     * fsupc = first supernodal column
-     * nsupc = no of columns in supernode
-     * nsupr = no of rows in supernode (used as leading dimension)
-     * luptr = location of supernodal LU-block in storage
-     * kfnz = first nonz in the k-th supernodal segment
-     * no_zeros = no of leading zeros in a supernodal U-segment
-     */
-    doublecomplex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          fsupc, nsupc, nsupr, segsze;
-    int          nrow;   /* No of rows in the matrix of matrix-vector */
-    int          jcolp1, jsupno, k, ksub, krep, krep_ind, ksupno;
-    register int lptr, kfnz, isub, irow, i;
-    register int no_zeros, new_next; 
-    int          ufirst, nextlu;
-    int          fst_col; /* First column within small LU update */
-    int          d_fsupc; /* Distance between the first column of the current
-                            panel and the first column of the current snode. */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    doublecomplex       *lusup;
-    int          *xlusup;
-    int          nzlumax;
-    doublecomplex       *tempv1;
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-    doublecomplex      none = {-1.0, 0.0};
-    doublecomplex       comp_temp, comp_temp1;
-    int          mem_error;
-    flops_t      *ops = stat->ops;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    nzlumax = Glu->nzlumax;
-    jcolp1 = jcol + 1;
-    jsupno = supno[jcol];
-    
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-
-       krep = segrep[k];
-       k--;
-       ksupno = supno[krep];
-       if ( jsupno != ksupno ) { /* Outside the rectangular supernode */
-
-           fsupc = xsup[ksupno];
-           fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-           /* Distance from the current supernode to the current panel; 
-              d_fsupc=0 if fsupc > fpanelc. */
-           d_fsupc = fst_col - fsupc; 
-
-           luptr = xlusup[fst_col] + d_fsupc;
-           lptr = xlsub[fsupc] + d_fsupc;
-
-           kfnz = repfnz[krep];
-           kfnz = SUPERLU_MAX ( kfnz, fpanelc );
-
-           segsze = krep - kfnz + 1;
-           nsupc = krep - fst_col + 1;
-           nsupr = xlsub[fsupc+1] - xlsub[fsupc];      /* Leading dimension */
-           nrow = nsupr - d_fsupc - nsupc;
-           krep_ind = lptr + nsupc - 1;
-
-           ops[TRSV] += 4 * segsze * (segsze - 1);
-           ops[GEMV] += 8 * nrow * segsze;
-
-
-
-           /* 
-            * Case 1: Update U-segment of size 1 -- col-col update 
-            */
-           if ( segsze == 1 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc;
-
-               for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                   irow = lsub[i];
-                   zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                   z_sub(&dense[irow], &dense[irow], &comp_temp);
-                   luptr++;
-               }
-
-           } else if ( segsze <= 3 ) {
-               ukj = dense[lsub[krep_ind]];
-               luptr += nsupr*(nsupc-1) + nsupc-1;
-               ukj1 = dense[lsub[krep_ind - 1]];
-               luptr1 = luptr - nsupr;
-
-               if ( segsze == 2 ) { /* Case 2: 2cols-col update */
-                   zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   z_sub(&ukj, &ukj, &comp_temp);
-                   dense[lsub[krep_ind]] = ukj;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               } else { /* Case 3: 3cols-col update */
-                   ukj2 = dense[lsub[krep_ind - 2]];
-                   luptr2 = luptr1 - nsupr;
-                   zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                   z_sub(&ukj1, &ukj1, &comp_temp);
-
-                   zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                   zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                   z_add(&comp_temp, &comp_temp, &comp_temp1);
-                   z_sub(&ukj, &ukj, &comp_temp);
-
-                   dense[lsub[krep_ind]] = ukj;
-                   dense[lsub[krep_ind-1]] = ukj1;
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                       irow = lsub[i];
-                       luptr++;
-                       luptr1++;
-                       luptr2++;
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&dense[irow], &dense[irow], &comp_temp);
-                   }
-               }
-
-
-           } else {
-               /*
-                * Case: sup-col update
-                * Perform a triangular solve and block update,
-                * then scatter the result of sup-col update to dense
-                */
-
-               no_zeros = kfnz - fst_col;
-
-               /* Copy U[*,j] segment from dense[*] to tempv[*] */
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   tempv[i] = dense[irow];
-                   ++isub; 
-               }
-
-               /* Dense triangular solve -- start effective triangle */
-               luptr += nsupr * no_zeros + no_zeros; 
-               
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#else          
-               ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                      &nsupr, tempv, &incx );
-#endif         
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-                alpha = one;
-                beta = zero;
-#ifdef _CRAY
-               CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-               zgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                      &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-               zlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-
-               luptr += segsze;  /* Dense matrix-vector */
-               tempv1 = &tempv[segsze];
-               zmatvec (nsupr, nrow , segsze, &lusup[luptr], tempv, tempv1);
-#endif
-               
-               
-                /* Scatter tempv[] into SPA dense[] as a temporary storage */
-                isub = lptr + no_zeros;
-                for (i = 0; i < segsze; i++) {
-                    irow = lsub[isub];
-                    dense[irow] = tempv[i];
-                    tempv[i] = zero;
-                    ++isub;
-                }
-
-               /* Scatter tempv1[] into SPA dense[] */
-               for (i = 0; i < nrow; i++) {
-                   irow = lsub[isub];
-                   z_sub(&dense[irow], &dense[irow], &tempv1[i]);
-                   tempv1[i] = zero;
-                   ++isub;
-               }
-           }
-           
-       } /* if jsupno ... */
-
-    } /* for each segment... */
-
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    nextlu = xlusup[jcol];
-    fsupc = xsup[jsupno];
-
-    /* Copy the SPA dense into L\U[*,j] */
-    new_next = nextlu + xlsub[fsupc+1] - xlsub[fsupc];
-    while ( new_next > nzlumax ) {
-       if (mem_error = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, Glu))
-           return (mem_error);
-       lusup = Glu->lusup;
-       lsub = Glu->lsub;
-    }
-
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = zero;
-       ++nextlu;
-    }
-
-    xlusup[jcolp1] = nextlu;   /* Close L\U[*,jcol] */
-
-    /* For more updates within the panel (also within the current supernode), 
-     * should start from the first column of the panel, or the first column 
-     * of the supernode, whichever is bigger. There are 2 cases:
-     *    1) fsupc < fpanelc, then fst_col := fpanelc
-     *    2) fsupc >= fpanelc, then fst_col := fsupc
-     */
-    fst_col = SUPERLU_MAX ( fsupc, fpanelc );
-
-    if ( fst_col < jcol ) {
-
-       /* Distance between the current supernode and the current panel.
-          d_fsupc=0 if fsupc >= fpanelc. */
-       d_fsupc = fst_col - fsupc;
-
-       lptr = xlsub[fsupc] + d_fsupc;
-       luptr = xlusup[fst_col] + d_fsupc;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];  /* Leading dimension */
-       nsupc = jcol - fst_col; /* Excluding jcol */
-       nrow = nsupr - d_fsupc - nsupc;
-
-       /* Points to the beginning of jcol in snode L\U(jsupno) */
-       ufirst = xlusup[jcol] + d_fsupc;        
-
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-       
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#else
-       ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr], 
-              &nsupr, &lusup[ufirst], &incx );
-#endif
-       
-       alpha = none; beta = one; /* y := beta*y + alpha*A*x */
-
-#ifdef _CRAY
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr,
-              &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-
-       zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc],
-               &lusup[ufirst], tempv );
-       
-        /* Copy updates from tempv[*] into lusup[*] */
-       isub = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           z_sub(&lusup[isub], &lusup[isub], &tempv[i]);
-           tempv[i] = zero;
-           ++isub;
-       }
-
-#endif
-       
-       
-    } /* if fst_col < jcol ... */ 
-
-    return 0;
-}
diff --git a/superlu/zcolumn_dfs.c b/superlu/zcolumn_dfs.c
deleted file mode 100644
index 0c7f5a07..00000000
--- a/superlu/zcolumn_dfs.c
+++ /dev/null
@@ -1,266 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_zdefs.h"
-
-/* What type of supernodes we want */
-#define T2_SUPER
-
-int
-zcolumn_dfs(
-          const int  m,         /* in - number of rows in the matrix */
-          const int  jcol,      /* in */
-          int        *perm_r,   /* in */
-          int        *nseg,     /* modified - with new segments appended */
-          int        *lsub_col, /* in - defines the RHS vector to start the 
dfs */
-          int        *segrep,   /* modified - with new segments appended */
-          int        *repfnz,   /* modified */
-          int        *xprune,   /* modified */
-          int        *marker,   /* modified */
-          int        *parent,   /* working array */
-          int        *xplore,   /* working array */
-          GlobalLU_t *Glu       /* modified */
-          )
-{
-/* 
- * Purpose
- * =======
- *   "column_dfs" performs a symbolic factorization on column jcol, and
- *   decide the supernode boundary.
- *
- *   This routine does not use numeric values, but only use the RHS 
- *   row indices to start the dfs.
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives. The routine returns a list of such supernodal 
- *   representatives in topological order of the dfs that generates them.
- *   The location of the first nonzero in each such supernodal segment
- *   (supernodal entry location) is also returned.
- *
- * Local parameters
- * ================
- *   nseg: no of segments in current U[*,j]
- *   jsuper: jsuper=EMPTY if column j does not belong to the same
- *     supernode as j-1. Otherwise, jsuper=nsuper.
- *
- *   marker2: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- * Return value
- * ============
- *     0  success;
- *   > 0  number of bytes allocated when run out of space.
- *
- */
-    int     jcolp1, jcolm1, jsuper, nsuper, nextl;
-    int     k, krep, krow, kmark, kperm;
-    int     *marker2;           /* Used for small panel LU */
-    int            fsupc;              /* First column of a snode */
-    int     myfnz;             /* First nonz column of a U-segment */
-    int            chperm, chmark, chrep, kchild;
-    int     xdfs, maxdfs, kpar, oldrep;
-    int     jptr, jm1ptr;
-    int     ito, ifrom, istop; /* Used to compress row subscripts */
-    int     mem_error;
-    int     *xsup, *supno, *lsub, *xlsub;
-    int     nzlmax;
-    static  int  first = 1, maxsuper;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       first = 0;
-    }
-    jcolp1  = jcol + 1;
-    jcolm1  = jcol - 1;
-    nsuper  = supno[jcol];
-    jsuper  = nsuper;
-    nextl   = xlsub[jcol];
-    marker2 = &marker[2*m];
-
-
-    /* For each nonzero in A[*,jcol] do dfs */
-    for (k = 0; lsub_col[k] != EMPTY; k++) {
-
-       krow = lsub_col[k];
-       lsub_col[k] = EMPTY;
-       kmark = marker2[krow];          
-
-       /* krow was visited before, go to the next nonz */
-        if ( kmark == jcol ) continue; 
-
-       /* For each unmarked nbr krow of jcol
-        *      krow is in L: place it in structure of L[*,jcol]
-        */
-       marker2[krow] = jcol;
-       kperm = perm_r[krow];
-
-       if ( kperm == EMPTY ) {
-           lsub[nextl++] = krow;       /* krow is indexed into A */
-           if ( nextl >= nzlmax ) {
-               if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-                   return (mem_error);
-               lsub = Glu->lsub;
-           }
-            if ( kmark != jcolm1 ) jsuper = EMPTY;/* Row index subset testing 
*/
-       } else {
-           /*  krow is in U: if its supernode-rep krep
-            *  has been explored, update repfnz[*]
-            */
-           krep = xsup[supno[kperm]+1] - 1;
-           myfnz = repfnz[krep];
-
-           if ( myfnz != EMPTY ) {     /* Visited before */
-               if ( myfnz > kperm ) repfnz[krep] = kperm;
-               /* continue; */
-           }
-           else {
-               /* Otherwise, perform dfs starting at krep */
-               oldrep = EMPTY;
-               parent[krep] = oldrep;
-               repfnz[krep] = kperm;
-               xdfs = xlsub[krep];
-               maxdfs = xprune[krep];
-
-               do {
-                   /* 
-                    * For each unmarked kchild of krep 
-                    */
-                   while ( xdfs < maxdfs ) {
-
-                       kchild = lsub[xdfs];
-                       xdfs++;
-                       chmark = marker2[kchild];
-
-                       if ( chmark != jcol ) { /* Not reached yet */
-                           marker2[kchild] = jcol;
-                           chperm = perm_r[kchild];
-
-                           /* Case kchild is in L: place it in L[*,k] */
-                           if ( chperm == EMPTY ) {
-                               lsub[nextl++] = kchild;
-                               if ( nextl >= nzlmax ) {
-                                   if ( mem_error =
-                                        
zLUMemXpand(jcol,nextl,LSUB,&nzlmax,Glu) )
-                                       return (mem_error);
-                                   lsub = Glu->lsub;
-                               }
-                               if ( chmark != jcolm1 ) jsuper = EMPTY;
-                           } else {
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               chrep = xsup[supno[chperm]+1] - 1;
-                               myfnz = repfnz[chrep];
-                               if ( myfnz != EMPTY ) { /* Visited before */
-                                   if ( myfnz > chperm )
-                                       repfnz[chrep] = chperm;
-                               } else {
-                                   /* Continue dfs at super-rep of kchild */
-                                   xplore[krep] = xdfs;        
-                                   oldrep = krep;
-                                   krep = chrep; /* Go deeper down G(L^t) */
-                                   parent[krep] = oldrep;
-                                   repfnz[krep] = chperm;
-                                   xdfs = xlsub[krep];     
-                                   maxdfs = xprune[krep];
-                               } /* else */
-
-                          } /* else */
-
-                       } /* if */
-
-                   } /* while */
-
-                   /* krow has no more unexplored nbrs;
-                    *    place supernode-rep krep in postorder DFS.
-                    *    backtrack dfs to its parent
-                    */
-                   segrep[*nseg] = krep;
-                   ++(*nseg);
-                   kpar = parent[krep]; /* Pop from stack, mimic recursion */
-                   if ( kpar == EMPTY ) break; /* dfs done */
-                   krep = kpar;
-                   xdfs = xplore[krep];
-                   maxdfs = xprune[krep];
-
-               } while ( kpar != EMPTY );      /* Until empty stack */
-
-           } /* else */
-
-       } /* else */
-
-    } /* for each nonzero ... */
-
-    /* Check to see if j belongs in the same supernode as j-1 */
-    if ( jcol == 0 ) { /* Do nothing for column 0 */
-       nsuper = supno[0] = 0;
-    } else {
-       fsupc = xsup[nsuper];
-       jptr = xlsub[jcol];     /* Not compressed yet */
-       jm1ptr = xlsub[jcolm1];
-
-#ifdef T2_SUPER
-       if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = EMPTY;
-#endif
-       /* Make sure the number of columns in a supernode doesn't
-          exceed threshold. */
-       if ( jcol - fsupc >= maxsuper ) jsuper = EMPTY;
-
-       /* If jcol starts a new supernode, reclaim storage space in
-        * lsub from the previous supernode. Note we only store
-        * the subscript set of the first and last columns of
-        * a supernode. (first for num values, last for pruning)
-        */
-       if ( jsuper == EMPTY ) {        /* starts a new supernode */
-           if ( (fsupc < jcolm1-1) ) { /* >= 3 columns in nsuper */
-#ifdef CHK_COMPRESS
-               printf("  Compress lsub[] at super %d-%d\n", fsupc, jcolm1);
-#endif
-               ito = xlsub[fsupc+1];
-               xlsub[jcolm1] = ito;
-               istop = ito + jptr - jm1ptr;
-               xprune[jcolm1] = istop; /* Initialize xprune[jcol-1] */
-               xlsub[jcol] = istop;
-               for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
-                   lsub[ito] = lsub[ifrom];
-               nextl = ito;            /* = istop + length(jcol) */
-           }
-           nsuper++;
-           supno[jcol] = nsuper;
-       } /* if a new supernode */
-
-    }  /* else: jcol > 0 */ 
-    
-    /* Tidy up the pointers before exit */
-    xsup[nsuper+1] = jcolp1;
-    supno[jcolp1]  = nsuper;
-    xprune[jcol]   = nextl;    /* Initialize upper bound for pruning */
-    xlsub[jcolp1]  = nextl;
-
-    return 0;
-}
diff --git a/superlu/zcopy_to_ucol.c b/superlu/zcopy_to_ucol.c
deleted file mode 100644
index 375a50bc..00000000
--- a/superlu/zcopy_to_ucol.c
+++ /dev/null
@@ -1,112 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_zdefs.h"
-
-int
-zcopy_to_ucol(
-             int        jcol,    /* in */
-             int        nseg,    /* in */
-             int        *segrep,  /* in */
-             int        *repfnz,  /* in */
-             int        *perm_r,  /* in */
-             doublecomplex     *dense,   /* modified - reset to zero on return 
*/
-             GlobalLU_t *Glu      /* modified */
-             )
-{
-/* 
- * Gather from SPA dense[*] to global ucol[*].
- */
-    int ksub, krep, ksupno;
-    int i, k, kfnz, segsze;
-    int fsupc, isub, irow;
-    int jsupno, nextu;
-    int new_next, mem_error;
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-    doublecomplex    *ucol;
-    int       *usub, *xusub;
-    int       nzumax;
-
-    doublecomplex zero = {0.0, 0.0};
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    nzumax  = Glu->nzumax;
-    
-    jsupno = supno[jcol];
-    nextu  = xusub[jcol];
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) {
-       krep = segrep[k--];
-       ksupno = supno[krep];
-
-       if ( ksupno != jsupno ) { /* Should go into ucol[] */
-           kfnz = repfnz[krep];
-           if ( kfnz != EMPTY ) {      /* Nonzero U-segment */
-
-               fsupc = xsup[ksupno];
-               isub = xlsub[fsupc] + kfnz - fsupc;
-               segsze = krep - kfnz + 1;
-
-               new_next = nextu + segsze;
-               while ( new_next > nzumax ) {
-                   if (mem_error = zLUMemXpand(jcol, nextu, UCOL, &nzumax, 
Glu))
-                       return (mem_error);
-                   ucol = Glu->ucol;
-                   if (mem_error = zLUMemXpand(jcol, nextu, USUB, &nzumax, 
Glu))
-                       return (mem_error);
-                   usub = Glu->usub;
-                   lsub = Glu->lsub;
-               }
-               
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   usub[nextu] = perm_r[irow];
-                   ucol[nextu] = dense[irow];
-                   dense[irow] = zero;
-                   nextu++;
-                   isub++;
-               } 
-
-           }
-
-       }
-
-    } /* for each segment... */
-
-    xusub[jcol + 1] = nextu;      /* Close U[*,jcol] */
-    return 0;
-}
diff --git a/superlu/zgscon.c b/superlu/zgscon.c
deleted file mode 100644
index 2014c9c0..00000000
--- a/superlu/zgscon.c
+++ /dev/null
@@ -1,152 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
- * File name:  zgscon.c
- * History:     Modified from lapack routines ZGECON.
- */
-#include <math.h>
-#include "slu_zdefs.h"
-
-void
-zgscon(char *norm, SuperMatrix *L, SuperMatrix *U,
-       double anorm, double *rcond, SuperLUStat_t *stat, int *info)
-{
-/*
-    Purpose   
-    =======   
-
-    ZGSCON estimates the reciprocal of the condition number of a general 
-    real matrix A, in either the 1-norm or the infinity-norm, using   
-    the LU factorization computed by ZGETRF.   
-
-    An estimate is obtained for norm(inv(A)), and the reciprocal of the   
-    condition number is computed as   
-       RCOND = 1 / ( norm(A) * norm(inv(A)) ).   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    NORM    (input) char*
-            Specifies whether the 1-norm condition number or the   
-            infinity-norm condition number is required:   
-            = '1' or 'O':  1-norm;   
-            = 'I':         Infinity-norm.
-           
-    L       (input) SuperMatrix*
-            The factor L from the factorization Pr*A*Pc=L*U as computed by
-            zgstrf(). Use compressed row subscripts storage for supernodes,
-            i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- 
-    U       (input) SuperMatrix*
-            The factor U from the factorization Pr*A*Pc=L*U as computed by
-            zgstrf(). Use column-wise storage scheme, i.e., U has types:
-            Stype = SLU_NC, Dtype = SLU_Z, Mtype = TRU.
-           
-    ANORM   (input) double
-            If NORM = '1' or 'O', the 1-norm of the original matrix A.   
-            If NORM = 'I', the infinity-norm of the original matrix A.
-           
-    RCOND   (output) double*
-            The reciprocal of the condition number of the matrix A,   
-            computed as RCOND = 1/(norm(A) * norm(inv(A))).
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    int    kase, kase1, onenrm, i;
-    double ainvnm;
-    doublecomplex *work;
-    extern int zrscl_(int *, doublecomplex *, doublecomplex *, int *);
-
-    extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int 
*);
-
-    
-    /* Test the input parameters. */
-    *info = 0;
-    onenrm = *(unsigned char *)norm == '1' || lsame_(norm, "O");
-    if (! onenrm && ! lsame_(norm, "I")) *info = -1;
-    else if (L->nrow < 0 || L->nrow != L->ncol ||
-             L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU)
-        *info = -2;
-    else if (U->nrow < 0 || U->nrow != U->ncol ||
-             U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU) 
-       *info = -3;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgscon", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    *rcond = 0.;
-    if ( L->nrow == 0 || U->nrow == 0) {
-       *rcond = 1.;
-       return;
-    }
-
-    work = doublecomplexCalloc( 3*L->nrow );
-
-
-    if ( !work )
-       ABORT("Malloc fails for work arrays in zgscon.");
-    
-    /* Estimate the norm of inv(A). */
-    ainvnm = 0.;
-    if ( onenrm ) kase1 = 1;
-    else kase1 = 2;
-    kase = 0;
-
-    do {
-       zlacon_(&L->nrow, &work[L->nrow], &work[0], &ainvnm, &kase);
-
-       if (kase == 0) break;
-
-       if (kase == kase1) {
-           /* Multiply by inv(L). */
-           sp_ztrsv("L", "No trans", "Unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(U). */
-           sp_ztrsv("U", "No trans", "Non-unit", L, U, &work[0], stat, info);
-           
-       } else {
-
-           /* Multiply by inv(U'). */
-           sp_ztrsv("U", "Transpose", "Non-unit", L, U, &work[0], stat, info);
-
-           /* Multiply by inv(L'). */
-           sp_ztrsv("L", "Transpose", "Unit", L, U, &work[0], stat, info);
-           
-       }
-
-    } while ( kase != 0 );
-
-    /* Compute the estimate of the reciprocal condition number. */
-    if (ainvnm != 0.) *rcond = (1. / ainvnm) / anorm;
-
-    SUPERLU_FREE (work);
-    return;
-
-} /* zgscon */
-
diff --git a/superlu/zgsequ.c b/superlu/zgsequ.c
deleted file mode 100644
index 4f9f5207..00000000
--- a/superlu/zgsequ.c
+++ /dev/null
@@ -1,205 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  zgsequ.c
- * History:     Modified from LAPACK routine ZGEEQU
- */
-#include <math.h>
-#include "slu_zdefs.h"
-
-void
-zgsequ(SuperMatrix *A, double *r, double *c, double *rowcnd,
-       double *colcnd, double *amax, int *info)
-{
-/*    
-    Purpose   
-    =======   
-
-    ZGSEQU computes row and column scalings intended to equilibrate an   
-    M-by-N sparse matrix A and reduce its condition number. R returns the row
-    scale factors and C the column scale factors, chosen to try to make   
-    the largest element in each row and column of the matrix B with   
-    elements B(i,j)=R(i)*A(i,j)*C(j) have absolute value 1.   
-
-    R(i) and C(j) are restricted to be between SMLNUM = smallest safe   
-    number and BIGNUM = largest safe number.  Use of these scaling   
-    factors is not guaranteed to reduce the condition number of A but   
-    works well in practice.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
- 
-    Arguments   
-    =========   
-
-    A       (input) SuperMatrix*
-            The matrix of dimension (A->nrow, A->ncol) whose equilibration
-            factors are to be computed. The type of A can be:
-            Stype = SLU_NC; Dtype = SLU_Z; Mtype = SLU_GE.
-           
-    R       (output) double*, size A->nrow
-            If INFO = 0 or INFO > M, R contains the row scale factors   
-            for A.
-           
-    C       (output) double*, size A->ncol
-            If INFO = 0,  C contains the column scale factors for A.
-           
-    ROWCND  (output) double*
-            If INFO = 0 or INFO > M, ROWCND contains the ratio of the   
-            smallest R(i) to the largest R(i).  If ROWCND >= 0.1 and   
-            AMAX is neither too large nor too small, it is not worth   
-            scaling by R.
-           
-    COLCND  (output) double*
-            If INFO = 0, COLCND contains the ratio of the smallest   
-            C(i) to the largest C(i).  If COLCND >= 0.1, it is not   
-            worth scaling by C.
-           
-    AMAX    (output) double*
-            Absolute value of largest matrix element.  If AMAX is very   
-            close to overflow or very close to underflow, the matrix   
-            should be scaled.
-           
-    INFO    (output) int*
-            = 0:  successful exit   
-            < 0:  if INFO = -i, the i-th argument had an illegal value   
-            > 0:  if INFO = i,  and i is   
-                  <= A->nrow:  the i-th row of A is exactly zero   
-                  >  A->ncol:  the (i-M)-th column of A is exactly zero   
-
-    ===================================================================== 
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int i, j, irow;
-    double rcmin, rcmax;
-    double bignum, smlnum;
-    extern double dlamch_(char *);
-    
-    /* Test the input parameters. */
-    *info = 0;
-    if ( A->nrow < 0 || A->ncol < 0 ||
-        A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -1;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgsequ", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || A->ncol == 0 ) {
-       *rowcnd = 1.;
-       *colcnd = 1.;
-       *amax = 0.;
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    bignum = 1. / smlnum;
-
-    /* Compute row scale factors. */
-    for (i = 0; i < A->nrow; ++i) r[i] = 0.;
-
-    /* Find the maximum element in each row. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           r[irow] = SUPERLU_MAX( r[irow], z_abs1(&Aval[i]) );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (i = 0; i < A->nrow; ++i) {
-       rcmax = SUPERLU_MAX(rcmax, r[i]);
-       rcmin = SUPERLU_MIN(rcmin, r[i]);
-    }
-    *amax = rcmax;
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (i = 0; i < A->nrow; ++i)
-           if (r[i] == 0.) {
-               *info = i + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (i = 0; i < A->nrow; ++i)
-           r[i] = 1. / SUPERLU_MIN( SUPERLU_MAX( r[i], smlnum ), bignum );
-       /* Compute ROWCND = min(R(I)) / max(R(I)) */
-       *rowcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    /* Compute column scale factors */
-    for (j = 0; j < A->ncol; ++j) c[j] = 0.;
-
-    /* Find the maximum element in each column, assuming the row
-       scalings computed above. */
-    for (j = 0; j < A->ncol; ++j)
-       for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-           irow = Astore->rowind[i];
-           c[j] = SUPERLU_MAX( c[j], z_abs1(&Aval[i]) * r[irow] );
-       }
-
-    /* Find the maximum and minimum scale factors. */
-    rcmin = bignum;
-    rcmax = 0.;
-    for (j = 0; j < A->ncol; ++j) {
-       rcmax = SUPERLU_MAX(rcmax, c[j]);
-       rcmin = SUPERLU_MIN(rcmin, c[j]);
-    }
-
-    if (rcmin == 0.) {
-       /* Find the first zero scale factor and return an error code. */
-       for (j = 0; j < A->ncol; ++j)
-           if ( c[j] == 0. ) {
-               *info = A->nrow + j + 1;
-               return;
-           }
-    } else {
-       /* Invert the scale factors. */
-       for (j = 0; j < A->ncol; ++j)
-           c[j] = 1. / SUPERLU_MIN( SUPERLU_MAX( c[j], smlnum ), bignum);
-       /* Compute COLCND = min(C(J)) / max(C(J)) */
-       *colcnd = SUPERLU_MAX( rcmin, smlnum ) / SUPERLU_MIN( rcmax, bignum );
-    }
-
-    return;
-
-} /* zgsequ */
-
-
diff --git a/superlu/zgsrfs.c b/superlu/zgsrfs.c
deleted file mode 100644
index e87c3616..00000000
--- a/superlu/zgsrfs.c
+++ /dev/null
@@ -1,456 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
- * File name:  zgsrfs.c
- * History:     Modified from lapack routine ZGERFS
- */
-#include <math.h>
-#include "slu_zdefs.h"
-
-void
-zgsrfs(trans_t trans, SuperMatrix *A, SuperMatrix *L, SuperMatrix *U,
-       int *perm_c, int *perm_r, char *equed, double *R, double *C,
-       SuperMatrix *B, SuperMatrix *X, double *ferr, double *berr,
-       SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose   
- *   =======   
- *
- *   ZGSRFS improves the computed solution to a system of linear   
- *   equations and provides error bounds and backward error estimates for 
- *   the solution.   
- *
- *   If equilibration was performed, the system becomes:
- *           (diag(R)*A_original*diag(C)) * X = diag(R)*B_original.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- *   Arguments   
- *   =========   
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *   
- *   A       (input) SuperMatrix*
- *           The original matrix A in the system, or the scaled A if
- *           equilibration was done. The type of A can be:
- *           Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_GE.
- *    
- *   L       (input) SuperMatrix*
- *          The factor L from the factorization Pr*A*Pc=L*U. Use
- *           compressed row subscripts storage for supernodes, 
- *           i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = 
SLU_TRLU.
- * 
- *   U       (input) SuperMatrix*
- *           The factor U from the factorization Pr*A*Pc=L*U as computed by
- *           zgstrf(). Use column-wise storage scheme, 
- *           i.e., U has types: Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- *   perm_c  (input) int*, dimension (A->ncol)
- *          Column permutation vector, which defines the 
- *           permutation matrix Pc; perm_c[i] = j means column i of A is 
- *           in position j in A*Pc.
- *
- *   perm_r  (input) int*, dimension (A->nrow)
- *           Row permutation vector, which defines the permutation matrix Pr;
- *           perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- *   equed   (input) Specifies the form of equilibration that was done.
- *           = 'N': No equilibration.
- *           = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *           = 'C': Column equilibration, i.e., A was postmultiplied by
- *                  diag(C).
- *           = 'B': Both row and column equilibration, i.e., A was replaced 
- *                  by diag(R)*A*diag(C).
- *
- *   R       (input) double*, dimension (A->nrow)
- *           The row scale factors for A.
- *           If equed = 'R' or 'B', A is premultiplied by diag(R).
- *           If equed = 'N' or 'C', R is not accessed.
- * 
- *   C       (input) double*, dimension (A->ncol)
- *           The column scale factors for A.
- *           If equed = 'C' or 'B', A is postmultiplied by diag(C).
- *           If equed = 'N' or 'R', C is not accessed.
- *
- *   B       (input) SuperMatrix*
- *           B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *           The right hand side matrix B.
- *           if equed = 'R' or 'B', B is premultiplied by diag(R).
- *
- *   X       (input/output) SuperMatrix*
- *           X has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *           On entry, the solution matrix X, as computed by zgstrs().
- *           On exit, the improved solution matrix X.
- *           if *equed = 'C' or 'B', X should be premultiplied by diag(C)
- *               in order to obtain the solution to the original system.
- *
- *   FERR    (output) double*, dimension (B->ncol)   
- *           The estimated forward error bound for each solution vector   
- *           X(j) (the j-th column of the solution matrix X).   
- *           If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *           is an estimated upper bound for the magnitude of the largest 
- *           element in (X(j) - XTRUE) divided by the magnitude of the   
- *           largest element in X(j).  The estimate is as reliable as   
- *           the estimate for RCOND, and is almost always a slight   
- *           overestimate of the true error.
- *
- *   BERR    (output) double*, dimension (B->ncol)   
- *           The componentwise relative backward error of each solution   
- *           vector X(j) (i.e., the smallest relative change in   
- *           any element of A or B that makes X(j) an exact solution).
- *
- *   stat     (output) SuperLUStat_t*
- *            Record the statistics on runtime and floating-point operation 
count.
- *            See util.h for the definition of 'SuperLUStat_t'.
- *
- *   info    (output) int*   
- *           = 0:  successful exit   
- *            < 0:  if INFO = -i, the i-th argument had an illegal value   
- *
- *    Internal Parameters   
- *    ===================   
- *
- *    ITMAX is the maximum number of steps of iterative refinement.   
- *
- */  
-
-#define ITMAX 5
-    
-    /* Table of constant values */
-    int    ione = 1;
-    doublecomplex ndone = {-1., 0.};
-    doublecomplex done = {1., 0.};
-    
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    SuperMatrix Bjcol;
-    DNformat *Bstore, *Xstore, *Bjcol_store;
-    doublecomplex   *Bmat, *Xmat, *Bptr, *Xptr;
-    int      kase;
-    double   safe1, safe2;
-    int      i, j, k, irow, nz, count, notran, rowequ, colequ;
-    int      ldb, ldx, nrhs;
-    double   s, xk, lstres, eps, safmin;
-    char     transc[1];
-    trans_t  transt;
-    doublecomplex   *work;
-    double   *rwork;
-    int      *iwork;
-    extern double dlamch_(char *);
-    extern int zlacon_(int *, doublecomplex *, doublecomplex *, double *, int 
*);
-#ifdef _CRAY
-    extern int CCOPY(int *, doublecomplex *, int *, doublecomplex *, int *);
-    extern int CSAXPY(int *, doublecomplex *, doublecomplex *, int *, 
doublecomplex *, int *);
-#else
-    extern int zcopy_(int *, doublecomplex *, int *, doublecomplex *, int *);
-    extern int zaxpy_(int *, doublecomplex *, doublecomplex *, int *, 
doublecomplex *, int *);
-#endif
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-    
-    /* Test the input parameters */
-    *info = 0;
-    notran = (trans == NOTRANS);
-    if ( !notran && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             A->Stype != SLU_NC || A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
-       *info = -3;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
-       *info = -4;
-    else if ( ldb < SUPERLU_MAX(0, A->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-        *info = -10;
-    else if ( ldx < SUPERLU_MAX(0, A->nrow) ||
-             X->Stype != SLU_DN || X->Dtype != SLU_Z || X->Mtype != SLU_GE )
-       *info = -11;
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgsrfs", &i);
-       return;
-    }
-
-    /* Quick return if possible */
-    if ( A->nrow == 0 || nrhs == 0) {
-       for (j = 0; j < nrhs; ++j) {
-           ferr[j] = 0.;
-           berr[j] = 0.;
-       }
-       return;
-    }
-
-    rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-    colequ = lsame_(equed, "C") || lsame_(equed, "B");
-    
-    /* Allocate working space */
-    work = doublecomplexMalloc(2*A->nrow);
-    rwork = (double *) SUPERLU_MALLOC( A->nrow * sizeof(double) );
-    iwork = intMalloc(A->nrow);
-    if ( !work || !rwork || !iwork ) 
-        ABORT("Malloc fails for work/rwork/iwork.");
-    
-    if ( notran ) {
-       *(unsigned char *)transc = 'N';
-        transt = TRANS;
-    } else {
-       *(unsigned char *)transc = 'T';
-       transt = NOTRANS;
-    }
-
-    /* NZ = maximum number of nonzero elements in each row of A, plus 1 */
-    nz     = A->ncol + 1;
-    eps    = dlamch_("Epsilon");
-    safmin = dlamch_("Safe minimum");
-    safe1  = nz * safmin;
-    safe2  = safe1 / eps;
-
-    /* Compute the number of nonzeros in each row (or column) of A */
-    for (i = 0; i < A->nrow; ++i) iwork[i] = 0;
-    if ( notran ) {
-       for (k = 0; k < A->ncol; ++k)
-           for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) 
-               ++iwork[Astore->rowind[i]];
-    } else {
-       for (k = 0; k < A->ncol; ++k)
-           iwork[k] = Astore->colptr[k+1] - Astore->colptr[k];
-    }  
-
-    /* Copy one column of RHS B into Bjcol. */
-    Bjcol.Stype = B->Stype;
-    Bjcol.Dtype = B->Dtype;
-    Bjcol.Mtype = B->Mtype;
-    Bjcol.nrow  = B->nrow;
-    Bjcol.ncol  = 1;
-    Bjcol.Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !Bjcol.Store ) ABORT("SUPERLU_MALLOC fails for Bjcol.Store");
-    Bjcol_store = Bjcol.Store;
-    Bjcol_store->lda = ldb;
-    Bjcol_store->nzval = work; /* address aliasing */
-       
-    /* Do for each right hand side ... */
-    for (j = 0; j < nrhs; ++j) {
-       count = 0;
-       lstres = 3.;
-       Bptr = &Bmat[j*ldb];
-       Xptr = &Xmat[j*ldx];
-
-       while (1) { /* Loop until stopping criterion is satisfied. */
-
-           /* Compute residual R = B - op(A) * X,   
-              where op(A) = A, A**T, or A**H, depending on TRANS. */
-           
-#ifdef _CRAY
-           CCOPY(&A->nrow, Bptr, &ione, work, &ione);
-#else
-           zcopy_(&A->nrow, Bptr, &ione, work, &ione);
-#endif
-           sp_zgemv(transc, ndone, A, Xptr, ione, done, work, ione);
-
-           /* Compute componentwise relative backward error from formula 
-              max(i) ( abs(R(i)) / ( abs(op(A))*abs(X) + abs(B) )(i) )   
-              where abs(Z) is the componentwise absolute value of the matrix
-              or vector Z.  If the i-th component of the denominator is less
-              than SAFE2, then SAFE1 is added to the i-th component of the   
-              numerator and denominator before dividing. */
-
-           for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
-           
-           /* Compute abs(op(A))*abs(X) + abs(B). */
-           if (notran) {
-               for (k = 0; k < A->ncol; ++k) {
-                   xk = z_abs1( &Xptr[k] );
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                       rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
-               }
-           } else {
-               for (k = 0; k < A->ncol; ++k) {
-                   s = 0.;
-                   for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       s += z_abs1(&Aval[i]) * z_abs1(&Xptr[irow]);
-                   }
-                   rwork[k] += s;
-               }
-           }
-           s = 0.;
-           for (i = 0; i < A->nrow; ++i) {
-               if (rwork[i] > safe2)
-                   s = SUPERLU_MAX( s, z_abs1(&work[i]) / rwork[i] );
-               else
-                   s = SUPERLU_MAX( s, (z_abs1(&work[i]) + safe1) / 
-                               (rwork[i] + safe1) );
-           }
-           berr[j] = s;
-
-           /* Test stopping criterion. Continue iterating if   
-              1) The residual BERR(J) is larger than machine epsilon, and   
-              2) BERR(J) decreased by at least a factor of 2 during the   
-                 last iteration, and   
-              3) At most ITMAX iterations tried. */
-
-           if (berr[j] > eps && berr[j] * 2. <= lstres && count < ITMAX) {
-               /* Update solution and try again. */
-               zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-#ifdef _CRAY
-               CAXPY(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#else
-               zaxpy_(&A->nrow, &done, work, &ione,
-                      &Xmat[j*ldx], &ione);
-#endif
-               lstres = berr[j];
-               ++count;
-           } else {
-               break;
-           }
-        
-       } /* end while */
-
-       stat->RefineSteps = count;
-
-       /* Bound error from formula:
-          norm(X - XTRUE) / norm(X) .le. FERR = norm( abs(inv(op(A)))*   
-          ( abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) / norm(X)   
-          where   
-            norm(Z) is the magnitude of the largest component of Z   
-            inv(op(A)) is the inverse of op(A)   
-            abs(Z) is the componentwise absolute value of the matrix or
-              vector Z   
-            NZ is the maximum number of nonzeros in any row of A, plus 1   
-            EPS is machine epsilon   
-
-          The i-th component of abs(R)+NZ*EPS*(abs(op(A))*abs(X)+abs(B))   
-          is incremented by SAFE1 if the i-th component of   
-          abs(op(A))*abs(X) + abs(B) is less than SAFE2.   
-
-          Use ZLACON to estimate the infinity-norm of the matrix   
-             inv(op(A)) * diag(W),   
-          where W = abs(R) + NZ*EPS*( abs(op(A))*abs(X)+abs(B) ))) */
-       
-       for (i = 0; i < A->nrow; ++i) rwork[i] = z_abs1( &Bptr[i] );
-       
-       /* Compute abs(op(A))*abs(X) + abs(B). */
-       if ( notran ) {
-           for (k = 0; k < A->ncol; ++k) {
-               xk = z_abs1( &Xptr[k] );
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i)
-                   rwork[Astore->rowind[i]] += z_abs1(&Aval[i]) * xk;
-           }
-       } else {
-           for (k = 0; k < A->ncol; ++k) {
-               s = 0.;
-               for (i = Astore->colptr[k]; i < Astore->colptr[k+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   xk = z_abs1( &Xptr[irow] );
-                   s += z_abs1(&Aval[i]) * xk;
-               }
-               rwork[k] += s;
-           }
-       }
-       
-       for (i = 0; i < A->nrow; ++i)
-           if (rwork[i] > safe2)
-               rwork[i] = z_abs(&work[i]) + (iwork[i]+1)*eps*rwork[i];
-           else
-               rwork[i] = z_abs(&work[i])+(iwork[i]+1)*eps*rwork[i]+safe1;
-       kase = 0;
-
-       do {
-           zlacon_(&A->nrow, &work[A->nrow], work,
-                   &ferr[j], &kase);
-           if (kase == 0) break;
-
-           if (kase == 1) {
-               /* Multiply by diag(W)*inv(op(A)**T)*(diag(C) or diag(R)). */
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       zd_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->nrow; ++i) {
-                       zd_mult(&work[i], &work[i], R[i]);
-                    }
-
-               zgstrs (transt, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               for (i = 0; i < A->nrow; ++i) {
-                   zd_mult(&work[i], &work[i], rwork[i]);
-               }
-           } else {
-               /* Multiply by (diag(C) or diag(R))*inv(op(A))*diag(W). */
-               for (i = 0; i < A->nrow; ++i) {
-                   zd_mult(&work[i], &work[i], rwork[i]);
-               }
-               
-               zgstrs (trans, L, U, perm_c, perm_r, &Bjcol, stat, info);
-               
-               if ( notran && colequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       zd_mult(&work[i], &work[i], C[i]);
-                   }
-               else if ( !notran && rowequ )
-                   for (i = 0; i < A->ncol; ++i) {
-                       zd_mult(&work[i], &work[i], R[i]);  
-                   }
-           }
-           
-       } while ( kase != 0 );
-
-       /* Normalize error. */
-       lstres = 0.;
-       if ( notran && colequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, C[i] * z_abs1( &Xptr[i]) );
-       } else if ( !notran && rowequ ) {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, R[i] * z_abs1( &Xptr[i]) );
-       } else {
-           for (i = 0; i < A->nrow; ++i)
-               lstres = SUPERLU_MAX( lstres, z_abs1( &Xptr[i]) );
-       }
-       if ( lstres != 0. )
-           ferr[j] /= lstres;
-
-    } /* for each RHS j ... */
-    
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(rwork);
-    SUPERLU_FREE(iwork);
-    SUPERLU_FREE(Bjcol.Store);
-
-    return;
-
-} /* zgsrfs */
diff --git a/superlu/zgssv.c b/superlu/zgssv.c
deleted file mode 100644
index 73bf9a86..00000000
--- a/superlu/zgssv.c
+++ /dev/null
@@ -1,230 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_zdefs.h"
-
-void
-zgssv(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-      SuperMatrix *L, SuperMatrix *U, SuperMatrix *B,
-      SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * ZGSSV solves the system of linear equations A*X=B, using the
- * LU factorization from ZGSTRF. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *
- *      1.1. Permute the columns of A, forming A*Pc, where Pc
- *           is a permutation matrix. For more details of this step, 
- *           see sp_preorder.c.
- *
- *      1.2. Factor A as Pr*A*Pc=L*U with the permutation Pr determined
- *           by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      1.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the
- *      above algorithm to the transpose of A:
- *
- *      2.1. Permute columns of transpose(A) (rows of A),
- *           forming transpose(A)*Pc, where Pc is a permutation matrix. 
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.2. Factor A as Pr*transpose(A)*Pc=L*U with the permutation Pr
- *           determined by Gaussian elimination with partial pivoting.
- *           L is unit lower triangular with offdiagonal entries
- *           bounded by 1 in magnitude, and U is upper triangular.
- *
- *      2.3. Solve the system of equations A*X=B using the factored
- *           form of A.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- * 
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR; Dtype = SLU_Z; Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- * perm_c  (input/output) int*
- *         If A->Stype = SLU_NC, column permutation vector of size A->ncol
- *         which defines the permutation matrix Pc; perm_c[i] = j means 
- *         column i of A is in position j in A*Pc.
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- *         If options->ColPerm = MY_PERMC or options->Fact = SamePattern or
- *            options->Fact = SamePattern_SameRowPerm, it is an input argument.
- *            On exit, perm_c may be overwritten by the product of the input
- *            perm_c and a permutation that postorders the elimination tree
- *            of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *            is already in postorder.
- *         Otherwise, it is an output argument.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined 
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->RowPerm = MY_PERMR or
- *            options->Fact = SamePattern_SameRowPerm, perm_r is an
- *            input argument.
- *         otherwise it is an output argument.
- *
- * L       (output) SuperMatrix*
- *         The factor L from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *         
- * U       (output) SuperMatrix*
- *        The factor U from the factorization 
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *         > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                so the solution could not be computed.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol.
- *   
- */
-    DNformat *Bstore;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int      lwork = 0, *etree, i;
-    
-    /* Set default values for some parameters */
-    double   drop_tol = 0.;
-    int      panel_size;     /* panel size */
-    int      relax;          /* no of columns in a relaxed snodes */
-    int      permc_spec;
-    trans_t  trans = NOTRANS;
-    double   *utime;
-    double   t;        /* Temporary time */
-
-    /* Test the input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    if ( options->Fact != DOFACT ) *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-        (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-        A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -2;
-    else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-       B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-       *info = -7;
-    if ( *info != 0 ) {
-       i = -(*info);
-       xerbla_("zgssv", &i);
-       return;
-    }
-
-    utime = stat->utime;
-
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       trans = TRANS;
-    } else {
-        if ( A->Stype == SLU_NC ) AA = A;
-    }
-
-    t = SuperLU_timer_();
-    /*
-     * Get column permutation vector perm_c[], according to permc_spec:
-     *   permc_spec = NATURAL:  natural ordering 
-     *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-     *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-     *   permc_spec = COLAMD:   approximate minimum degree column ordering
-     *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-     */
-    permc_spec = options->ColPerm;
-    if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-      get_perm_c(permc_spec, AA, perm_c);
-    utime[COLPERM] = SuperLU_timer_() - t;
-
-    etree = intMalloc(A->ncol);
-
-    t = SuperLU_timer_();
-    sp_preorder(options, AA, perm_c, etree, &AC);
-    utime[ETREE] = SuperLU_timer_() - t;
-
-    panel_size = sp_ienv(1);
-    relax = sp_ienv(2);
-
-    /*printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-         relax, panel_size, sp_ienv(3), sp_ienv(4));*/
-    t = SuperLU_timer_(); 
-    /* Compute the LU factorization of A. */
-    zgstrf(options, &AC, drop_tol, relax, panel_size,
-          etree, NULL, lwork, perm_c, perm_r, L, U, stat, info);
-    utime[FACT] = SuperLU_timer_() - t;
-
-    t = SuperLU_timer_();
-    if ( *info == 0 ) {
-        /* Solve the system A*X=B, overwriting B with X. */
-        zgstrs (trans, L, U, perm_c, perm_r, B, stat, info);
-    }
-    utime[SOLVE] = SuperLU_timer_() - t;
-
-    SUPERLU_FREE (etree);
-    Destroy_CompCol_Permuted(&AC);
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/zgssvx.c b/superlu/zgssvx.c
deleted file mode 100644
index 09979b5e..00000000
--- a/superlu/zgssvx.c
+++ /dev/null
@@ -1,623 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_zdefs.h"
-
-void
-zgssvx(superlu_options_t *options, SuperMatrix *A, int *perm_c, int *perm_r,
-       int *etree, char *equed, double *R, double *C,
-       SuperMatrix *L, SuperMatrix *U, void *work, int lwork,
-       SuperMatrix *B, SuperMatrix *X, double *recip_pivot_growth, 
-       double *rcond, double *ferr, double *berr, 
-       mem_usage_t *mem_usage, SuperLUStat_t *stat, int *info )
-{
-/*
- * Purpose
- * =======
- *
- * ZGSSVX solves the system of linear equations A*X=B or A'*X=B, using
- * the LU factorization from zgstrf(). Error bounds on the solution and
- * a condition estimate are also provided. It performs the following steps:
- *
- *   1. If A is stored column-wise (A->Stype = SLU_NC):
- *  
- *      1.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A is
- *           overwritten by diag(R)*A*diag(C) and B by diag(R)*B
- *           (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans
- *           = TRANS or CONJ).
- *
- *      1.2. Permute columns of A, forming A*Pc, where Pc is a permutation
- *           matrix that usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      1.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the matrix A (after equilibration if options->Equil = YES)
- *           as Pr*A*Pc = L*U, with Pr determined by partial pivoting.
- *
- *      1.4. Compute the reciprocal pivot growth factor.
- *
- *      1.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form of 
- *           A is used to estimate the condition number of the matrix A. If
- *           the reciprocal of the condition number is less than machine
- *           precision, info = A->ncol+1 is returned as a warning, but the
- *           routine still goes on to solve for X and computes error bounds
- *           as described below.
- *
- *      1.6. The system of equations is solved for X using the factored form
- *           of A.
- *
- *      1.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      1.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R)
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   2. If A is stored row-wise (A->Stype = SLU_NR), apply the above algorithm
- *      to the transpose of A:
- *
- *      2.1. If options->Equil = YES, scaling factors are computed to
- *           equilibrate the system:
- *           options->Trans = NOTRANS:
- *               diag(R)*A*diag(C) *inv(diag(C))*X = diag(R)*B
- *           options->Trans = TRANS:
- *               (diag(R)*A*diag(C))**T *inv(diag(R))*X = diag(C)*B
- *           options->Trans = CONJ:
- *               (diag(R)*A*diag(C))**H *inv(diag(R))*X = diag(C)*B
- *           Whether or not the system will be equilibrated depends on the
- *           scaling of the matrix A, but if equilibration is used, A' is
- *           overwritten by diag(R)*A'*diag(C) and B by diag(R)*B 
- *           (if trans='N') or diag(C)*B (if trans = 'T' or 'C').
- *
- *      2.2. Permute columns of transpose(A) (rows of A), 
- *           forming transpose(A)*Pc, where Pc is a permutation matrix that 
- *           usually preserves sparsity.
- *           For more details of this step, see sp_preorder.c.
- *
- *      2.3. If options->Fact != FACTORED, the LU decomposition is used to
- *           factor the transpose(A) (after equilibration if 
- *           options->Fact = YES) as Pr*transpose(A)*Pc = L*U with the
- *           permutation Pr determined by partial pivoting.
- *
- *      2.4. Compute the reciprocal pivot growth factor.
- *
- *      2.5. If some U(i,i) = 0, so that U is exactly singular, then the
- *           routine returns with info = i. Otherwise, the factored form 
- *           of transpose(A) is used to estimate the condition number of the
- *           matrix A. If the reciprocal of the condition number
- *           is less than machine precision, info = A->nrow+1 is returned as
- *           a warning, but the routine still goes on to solve for X and
- *           computes error bounds as described below.
- *
- *      2.6. The system of equations is solved for X using the factored form
- *           of transpose(A).
- *
- *      2.7. If options->IterRefine != NOREFINE, iterative refinement is
- *           applied to improve the computed solution matrix and calculate
- *           error bounds and backward error estimates for it.
- *
- *      2.8. If equilibration was used, the matrix X is premultiplied by
- *           diag(C) (if options->Trans = NOTRANS) or diag(R) 
- *           (if options->Trans = TRANS or CONJ) so that it solves the
- *           original system before equilibration.
- *
- *   See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed and how the
- *         system will be solved.
- *
- * A       (input/output) SuperMatrix*
- *         Matrix A in A*X=B, of dimension (A->nrow, A->ncol). The number
- *         of the linear equations is A->nrow. Currently, the type of A can be:
- *         Stype = SLU_NC or SLU_NR, Dtype = SLU_D, Mtype = SLU_GE.
- *         In the future, more general A may be handled.
- *
- *         On entry, If options->Fact = FACTORED and equed is not 'N', 
- *         then A must have been equilibrated by the scaling factors in
- *         R and/or C.  
- *         On exit, A is not modified if options->Equil = NO, or if 
- *         options->Equil = YES but equed = 'N' on exit.
- *         Otherwise, if options->Equil = YES and equed is not 'N',
- *         A is scaled as follows:
- *         If A->Stype = SLU_NC:
- *           equed = 'R':  A := diag(R) * A
- *           equed = 'C':  A := A * diag(C)
- *           equed = 'B':  A := diag(R) * A * diag(C).
- *         If A->Stype = SLU_NR:
- *           equed = 'R':  transpose(A) := diag(R) * transpose(A)
- *           equed = 'C':  transpose(A) := transpose(A) * diag(C)
- *           equed = 'B':  transpose(A) := diag(R) * transpose(A) * diag(C).
- *
- * perm_c  (input/output) int*
- *        If A->Stype = SLU_NC, Column permutation vector of size A->ncol,
- *         which defines the permutation matrix Pc; perm_c[i] = j means
- *         column i of A is in position j in A*Pc.
- *         On exit, perm_c may be overwritten by the product of the input
- *         perm_c and a permutation that postorders the elimination tree
- *         of Pc'*A'*A*Pc; perm_c is not changed if the elimination tree
- *         is already in postorder.
- *
- *         If A->Stype = SLU_NR, column permutation vector of size A->nrow,
- *         which describes permutation of columns of transpose(A) 
- *         (rows of A) as described above.
- * 
- * perm_r  (input/output) int*
- *         If A->Stype = SLU_NC, row permutation vector of size A->nrow, 
- *         which defines the permutation matrix Pr, and is determined
- *         by partial pivoting.  perm_r[i] = j means row i of A is in 
- *         position j in Pr*A.
- *
- *         If A->Stype = SLU_NR, permutation vector of size A->ncol, which
- *         determines permutation of rows of transpose(A)
- *         (columns of A) as described above.
- *
- *         If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *         will try to use the input perm_r, unless a certain threshold
- *         criterion is violated. In that case, perm_r is overwritten by a
- *         new permutation determined by partial pivoting or diagonal
- *         threshold pivoting.
- *         Otherwise, perm_r is output argument.
- * 
- * etree   (input/output) int*,  dimension (A->ncol)
- *         Elimination tree of Pc'*A'*A*Pc.
- *         If options->Fact != FACTORED and options->Fact != DOFACT,
- *         etree is an input argument, otherwise it is an output argument.
- *         Note: etree is a vector of parent pointers for a forest whose
- *         vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *
- * equed   (input/output) char*
- *         Specifies the form of equilibration that was done.
- *         = 'N': No equilibration.
- *         = 'R': Row equilibration, i.e., A was premultiplied by diag(R).
- *         = 'C': Column equilibration, i.e., A was postmultiplied by diag(C).
- *         = 'B': Both row and column equilibration, i.e., A was replaced 
- *                by diag(R)*A*diag(C).
- *         If options->Fact = FACTORED, equed is an input argument,
- *         otherwise it is an output argument.
- *
- * R       (input/output) double*, dimension (A->nrow)
- *         The row scale factors for A or transpose(A).
- *         If equed = 'R' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the left by diag(R).
- *         If equed = 'N' or 'C', R is not accessed.
- *         If options->Fact = FACTORED, R is an input argument,
- *             otherwise, R is output.
- *         If options->zFact = FACTORED and equed = 'R' or 'B', each element
- *             of R must be positive.
- * 
- * C       (input/output) double*, dimension (A->ncol)
- *         The column scale factors for A or transpose(A).
- *         If equed = 'C' or 'B', A (if A->Stype = SLU_NC) or transpose(A)
- *             (if A->Stype = SLU_NR) is multiplied on the right by diag(C).
- *         If equed = 'N' or 'R', C is not accessed.
- *         If options->Fact = FACTORED, C is an input argument,
- *             otherwise, C is output.
- *         If options->Fact = FACTORED and equed = 'C' or 'B', each element
- *             of C must be positive.
- *         
- * L       (output) SuperMatrix*
- *        The factor L from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype SLU_= NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses compressed row subscripts storage for supernodes, i.e.,
- *         L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U       (output) SuperMatrix*
- *        The factor U from the factorization
- *             Pr*A*Pc=L*U              (if A->Stype = SLU_NC) or
- *             Pr*transpose(A)*Pc=L*U   (if A->Stype = SLU_NR).
- *         Uses column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * work    (workspace/output) void*, size (lwork) (in bytes)
- *         User supplied workspace, should be large enough
- *         to hold data structures for factors L and U.
- *         On exit, if fact is not 'F', L and U point to this array.
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               mem_usage->total_needed; no other side effects.
- *
- *         See argument 'mem_usage' for memory usage statistics.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         If B->ncol = 0, only LU decomposition is performed, the triangular
- *                         solve is skipped.
- *         On exit,
- *            if equed = 'N', B is not modified; otherwise
- *            if A->Stype = SLU_NC:
- *               if options->Trans = NOTRANS and equed = 'R' or 'B',
- *                  B is overwritten by diag(R)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'C' of 'B',
- *                  B is overwritten by diag(C)*B;
- *            if A->Stype = SLU_NR:
- *               if options->Trans = NOTRANS and equed = 'C' or 'B',
- *                  B is overwritten by diag(C)*B;
- *               if options->Trans = TRANS or CONJ and equed = 'R' of 'B',
- *                  B is overwritten by diag(R)*B.
- *
- * X       (output) SuperMatrix*
- *         X has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE. 
- *         If info = 0 or info = A->ncol+1, X contains the solution matrix
- *         to the original system of equations. Note that A and B are modified
- *         on exit if equed is not 'N', and the solution to the equilibrated
- *         system is inv(diag(C))*X if options->Trans = NOTRANS and
- *         equed = 'C' or 'B', or inv(diag(R))*X if options->Trans = 'T' or 'C'
- *         and equed = 'R' or 'B'.
- *
- * recip_pivot_growth (output) double*
- *         The reciprocal pivot growth factor max_j( norm(A_j)/norm(U_j) ).
- *         The infinity norm is used. If recip_pivot_growth is much less
- *         than 1, the stability of the LU factorization could be poor.
- *
- * rcond   (output) double*
- *         The estimate of the reciprocal condition number of the matrix A
- *         after equilibration (if done). If rcond is less than the machine
- *         precision (in particular, if rcond = 0), the matrix is singular
- *         to working precision. This condition is indicated by a return
- *         code of info > 0.
- *
- * FERR    (output) double*, dimension (B->ncol)   
- *         The estimated forward error bound for each solution vector   
- *         X(j) (the j-th column of the solution matrix X).   
- *         If XTRUE is the true solution corresponding to X(j), FERR(j) 
- *         is an estimated upper bound for the magnitude of the largest 
- *         element in (X(j) - XTRUE) divided by the magnitude of the   
- *         largest element in X(j).  The estimate is as reliable as   
- *         the estimate for RCOND, and is almost always a slight   
- *         overestimate of the true error.
- *         If options->IterRefine = NOREFINE, ferr = 1.0.
- *
- * BERR    (output) double*, dimension (B->ncol)
- *         The componentwise relative backward error of each solution   
- *         vector X(j) (i.e., the smallest relative change in   
- *         any element of A or B that makes X(j) an exact solution).
- *         If options->IterRefine = NOREFINE, berr = 1.0.
- *
- * mem_usage (output) mem_usage_t*
- *         Record the memory usage statistics, consisting of following fields:
- *         - for_lu (float)
- *           The amount of space used in bytes for L\U data structures.
- *         - total_needed (float)
- *           The amount of space needed in bytes to perform factorization.
- *         - expansions (int)
- *           The number of memory expansions during the LU factorization.
- *
- * stat   (output) SuperLUStat_t*
- *        Record the statistics on runtime and floating-point operation count.
- *        See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *         = 0: successful exit   
- *         < 0: if info = -i, the i-th argument had an illegal value   
- *         > 0: if info = i, and i is   
- *              <= A->ncol: U(i,i) is exactly zero. The factorization has   
- *                    been completed, but the factor U is exactly   
- *                    singular, so the solution and error bounds   
- *                    could not be computed.   
- *              = A->ncol+1: U is nonsingular, but RCOND is less than machine
- *                    precision, meaning that the matrix is singular to
- *                    working precision. Nevertheless, the solution and
- *                    error bounds are computed because there are a number
- *                    of situations where the computed solution can be more
- *                    accurate than the value of RCOND would suggest.   
- *              > A->ncol+1: number of bytes allocated when memory allocation
- *                    failure occurred, plus A->ncol.
- *
- */
-
-    DNformat  *Bstore, *Xstore;
-    doublecomplex    *Bmat, *Xmat;
-    int       ldb, ldx, nrhs;
-    SuperMatrix *AA;/* A in SLU_NC format used by the factorization routine.*/
-    SuperMatrix AC; /* Matrix postmultiplied by Pc */
-    int       colequ, equil, nofact, notran, rowequ, permc_spec;
-    trans_t   trant;
-    char      norm[1];
-    int       i, j, info1;
-    double    amax, anorm, bignum, smlnum, colcnd, rowcnd, rcmax, rcmin;
-    int       relax, panel_size;
-    double    diag_pivot_thresh, drop_tol;
-    double    t0;      /* temporary time */
-    double    *utime;
-
-    /* External functions */
-    extern double zlangs(char *, SuperMatrix *);
-    extern double dlamch_(char *);
-
-    Bstore = B->Store;
-    Xstore = X->Store;
-    Bmat   = Bstore->nzval;
-    Xmat   = Xstore->nzval;
-    ldb    = Bstore->lda;
-    ldx    = Xstore->lda;
-    nrhs   = B->ncol;
-
-    *info = 0;
-    nofact = (options->Fact != FACTORED);
-    equil = (options->Equil == YES);
-    notran = (options->Trans == NOTRANS);
-    if ( nofact ) {
-       *(unsigned char *)equed = 'N';
-       rowequ = FALSE;
-       colequ = FALSE;
-    } else {
-       rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-       colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       smlnum = dlamch_("Safe minimum");
-       bignum = 1. / smlnum;
-    }
-
-#if 0
-printf("dgssvx: Fact=%4d, Trans=%4d, equed=%c\n",
-       options->Fact, options->Trans, *equed);
-#endif
-
-    /* Test the input parameters */
-    if (!nofact && options->Fact != DOFACT && options->Fact != SamePattern &&
-       options->Fact != SamePattern_SameRowPerm &&
-       !notran && options->Trans != TRANS && options->Trans != CONJ &&
-       !equil && options->Equil != NO)
-       *info = -1;
-    else if ( A->nrow != A->ncol || A->nrow < 0 ||
-             (A->Stype != SLU_NC && A->Stype != SLU_NR) ||
-             A->Dtype != SLU_Z || A->Mtype != SLU_GE )
-       *info = -2;
-    else if (options->Fact == FACTORED &&
-            !(rowequ || colequ || lsame_(equed, "N")))
-       *info = -6;
-    else {
-       if (rowequ) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, R[j]);
-               rcmax = SUPERLU_MAX(rcmax, R[j]);
-           }
-           if (rcmin <= 0.) *info = -7;
-           else if ( A->nrow > 0)
-               rowcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else rowcnd = 1.;
-       }
-       if (colequ && *info == 0) {
-           rcmin = bignum;
-           rcmax = 0.;
-           for (j = 0; j < A->nrow; ++j) {
-               rcmin = SUPERLU_MIN(rcmin, C[j]);
-               rcmax = SUPERLU_MAX(rcmax, C[j]);
-           }
-           if (rcmin <= 0.) *info = -8;
-           else if (A->nrow > 0)
-               colcnd = SUPERLU_MAX(rcmin,smlnum) / SUPERLU_MIN(rcmax,bignum);
-           else colcnd = 1.;
-       }
-       if (*info == 0) {
-           if ( lwork < -1 ) *info = -12;
-           else if ( B->ncol < 0 || Bstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     B->Stype != SLU_DN || B->Dtype != SLU_Z || 
-                     B->Mtype != SLU_GE )
-               *info = -13;
-           else if ( X->ncol < 0 || Xstore->lda < SUPERLU_MAX(0, A->nrow) ||
-                     (B->ncol != 0 && B->ncol != X->ncol) ||
-                      X->Stype != SLU_DN ||
-                     X->Dtype != SLU_Z || X->Mtype != SLU_GE )
-               *info = -14;
-       }
-    }
-    if (*info != 0) {
-       i = -(*info);
-       xerbla_("zgssvx", &i);
-       return;
-    }
-    
-    /* Initialization for factor parameters */
-    panel_size = sp_ienv(1);
-    relax      = sp_ienv(2);
-    diag_pivot_thresh = options->DiagPivotThresh;
-    drop_tol   = 0.0;
-
-    utime = stat->utime;
-    
-    /* Convert A to SLU_NC format when necessary. */
-    if ( A->Stype == SLU_NR ) {
-       NRformat *Astore = A->Store;
-       AA = (SuperMatrix *) SUPERLU_MALLOC( sizeof(SuperMatrix) );
-       zCreate_CompCol_Matrix(AA, A->ncol, A->nrow, Astore->nnz, 
-                              Astore->nzval, Astore->colind, Astore->rowptr,
-                              SLU_NC, A->Dtype, A->Mtype);
-       if ( notran ) { /* Reverse the transpose argument. */
-           trant = TRANS;
-           notran = 0;
-       } else {
-           trant = NOTRANS;
-           notran = 1;
-       }
-    } else { /* A->Stype == SLU_NC */
-       trant = options->Trans;
-       AA = A;
-    }
-
-    if ( nofact && equil ) {
-       t0 = SuperLU_timer_();
-       /* Compute row and column scalings to equilibrate the matrix A. */
-       zgsequ(AA, R, C, &rowcnd, &colcnd, &amax, &info1);
-       
-       if ( info1 == 0 ) {
-           /* Equilibrate matrix A. */
-           zlaqgs(AA, R, C, rowcnd, colcnd, amax, equed);
-           rowequ = lsame_(equed, "R") || lsame_(equed, "B");
-           colequ = lsame_(equed, "C") || lsame_(equed, "B");
-       }
-       utime[EQUIL] = SuperLU_timer_() - t0;
-    }
-
-    if ( nrhs > 0 ) {
-        /* Scale the right hand side if equilibration was performed. */
-        if ( notran ) {
-           if ( rowequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        zd_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], R[i]);
-                   }
-           }
-        } else if ( colequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    zd_mult(&Bmat[i+j*ldb], &Bmat[i+j*ldb], C[i]);
-               }
-        }
-    }
-
-    if ( nofact ) {
-       
-        t0 = SuperLU_timer_();
-       /*
-        * Gnet column permutation vector perm_c[], according to permc_spec:
-        *   permc_spec = NATURAL:  natural ordering 
-        *   permc_spec = MMD_AT_PLUS_A: minimum degree on structure of A'+A
-        *   permc_spec = MMD_ATA:  minimum degree on structure of A'*A
-        *   permc_spec = COLAMD:   approximate minimum degree column ordering
-        *   permc_spec = MY_PERMC: the ordering already supplied in perm_c[]
-        */
-       permc_spec = options->ColPerm;
-       if ( permc_spec != MY_PERMC && options->Fact == DOFACT )
-            get_perm_c(permc_spec, AA, perm_c);
-       utime[COLPERM] = SuperLU_timer_() - t0;
-
-       t0 = SuperLU_timer_();
-       sp_preorder(options, AA, perm_c, etree, &AC);
-       utime[ETREE] = SuperLU_timer_() - t0;
-    
-/*     printf("Factor PA = LU ... relax %d\tw %d\tmaxsuper %d\trowblk %d\n", 
-              relax, panel_size, sp_ienv(3), sp_ienv(4));
-       fflush(stdout); */
-       
-       /* Compute the LU factorization of A*Pc. */
-       t0 = SuperLU_timer_();
-       zgstrf(options, &AC, drop_tol, relax, panel_size,
-              etree, work, lwork, perm_c, perm_r, L, U, stat, info);
-       utime[FACT] = SuperLU_timer_() - t0;
-       
-       if ( lwork == -1 ) {
-           mem_usage->total_needed = *info - A->ncol;
-           return;
-       }
-    }
-
-    if ( options->PivotGrowth ) {
-        if ( *info > 0 ) {
-           if ( *info <= A->ncol ) {
-               /* Compute the reciprocal pivot growth factor of the leading
-                  rank-deficient *info columns of A. */
-               *recip_pivot_growth = zPivotGrowth(*info, AA, perm_c, L, U);
-           }
-           return;
-        }
-
-        /* Compute the reciprocal pivot growth factor *recip_pivot_growth. */
-        *recip_pivot_growth = zPivotGrowth(A->ncol, AA, perm_c, L, U);
-    }
-
-    if ( options->ConditionNumber ) {
-        /* Estimate the reciprocal of the condition number of A. */
-        t0 = SuperLU_timer_();
-        if ( notran ) {
-           *(unsigned char *)norm = '1';
-        } else {
-           *(unsigned char *)norm = 'I';
-        }
-        anorm = zlangs(norm, AA);
-        zgscon(norm, L, U, anorm, rcond, stat, info);
-        utime[RCOND] = SuperLU_timer_() - t0;
-    }
-    
-    if ( nrhs > 0 ) {
-        /* Compute the solution matrix X. */
-        for (j = 0; j < nrhs; j++)  /* Save a copy of the right hand sides */
-            for (i = 0; i < B->nrow; i++)
-               Xmat[i + j*ldx] = Bmat[i + j*ldb];
-    
-        t0 = SuperLU_timer_();
-        zgstrs (trant, L, U, perm_c, perm_r, X, stat, info);
-        utime[SOLVE] = SuperLU_timer_() - t0;
-    
-        /* Use iterative refinement to improve the computed solution and 
compute
-           error bounds and backward error estimates for it. */
-        t0 = SuperLU_timer_();
-        if ( options->IterRefine != NOREFINE ) {
-            zgsrfs(trant, AA, L, U, perm_c, perm_r, equed, R, C, B,
-                   X, ferr, berr, stat, info);
-        } else {
-            for (j = 0; j < nrhs; ++j) ferr[j] = berr[j] = 1.0;
-        }
-        utime[REFINE] = SuperLU_timer_() - t0;
-
-        /* Transform the solution matrix X to a solution of the original 
system. */
-        if ( notran ) {
-           if ( colequ ) {
-               for (j = 0; j < nrhs; ++j)
-                   for (i = 0; i < A->nrow; ++i) {
-                        zd_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], C[i]);
-                   }
-           }
-        } else if ( rowequ ) {
-           for (j = 0; j < nrhs; ++j)
-               for (i = 0; i < A->nrow; ++i) {
-                    zd_mult(&Xmat[i+j*ldx], &Xmat[i+j*ldx], R[i]);
-                }
-        }
-    } /* end if nrhs > 0 */
-
-    if ( options->ConditionNumber ) {
-        /* Set INFO = A->ncol+1 if the matrix is singular to working 
precision. */
-        if ( *rcond < dlamch_("E") ) *info = A->ncol + 1;
-    }
-
-    if ( nofact ) {
-        zQuerySpace(L, U, mem_usage);
-        Destroy_CompCol_Permuted(&AC);
-    }
-    if ( A->Stype == SLU_NR ) {
-       Destroy_SuperMatrix_Store(AA);
-       SUPERLU_FREE(AA);
-    }
-
-}
diff --git a/superlu/zgstrf.c b/superlu/zgstrf.c
deleted file mode 100644
index 1aaec38a..00000000
--- a/superlu/zgstrf.c
+++ /dev/null
@@ -1,432 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_zdefs.h"
-extern void countnz();
-extern void fixupL();
-
-void
-zgstrf (superlu_options_t *options, SuperMatrix *A, double drop_tol,
-        int relax, int panel_size, int *etree, void *work, int lwork,
-        int *perm_c, int *perm_r, SuperMatrix *L, SuperMatrix *U,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * ZGSTRF computes an LU factorization of a general sparse m-by-n
- * matrix A using partial pivoting with row interchanges.
- * The factorization has the form
- *     Pr * A = L * U
- * where Pr is a row permutation matrix, L is lower triangular with unit
- * diagonal elements (lower trapezoidal if A->nrow > A->ncol), and U is upper 
- * triangular (upper trapezoidal if A->nrow < A->ncol).
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * options (input) superlu_options_t*
- *         The structure defines the input parameters to control
- *         how the LU decomposition will be performed.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = SLU_NCP; Dtype = SLU_Z; Mtype = SLU_GE.
- *
- * drop_tol (input) double (NOT IMPLEMENTED)
- *         Drop tolerance parameter. At step j of the Gaussian elimination,
- *          if abs(A_ij)/(max_i abs(A_ij)) < drop_tol, drop entry A_ij.
- *          0 <= drop_tol <= 1. The default value of drop_tol is 0.
- *
- * relax    (input) int
- *          To control degree of relaxing supernodes. If the number
- *          of nodes (columns) in a subtree of the elimination tree is less
- *          than relax, this subtree is considered as one supernode,
- *          regardless of the row structures of those columns.
- *
- * panel_size (input) int
- *          A panel consists of at most panel_size consecutive columns.
- *
- * etree    (input) int*, dimension (A->ncol)
- *          Elimination tree of A'*A.
- *          Note: etree is a vector of parent pointers for a forest whose
- *          vertices are the integers 0 to A->ncol-1; etree[root]==A->ncol.
- *          On input, the columns of A should be permuted so that the
- *          etree is in a certain postorder.
- *
- * work     (input/output) void*, size (lwork) (in bytes)
- *          User-supplied work space and space for the output data structures.
- *          Not referenced if lwork = 0;
- *
- * lwork   (input) int
- *         Specifies the size of work array in bytes.
- *         = 0:  allocate space internally by system malloc;
- *         > 0:  use user-supplied work array of length lwork in bytes,
- *               returns error if space runs out.
- *         = -1: the routine guesses the amount of space needed without
- *               performing the factorization, and returns it in
- *               *info; no other side effects.
- *
- * perm_c   (input) int*, dimension (A->ncol)
- *         Column permutation vector, which defines the 
- *          permutation matrix Pc; perm_c[i] = j means column i of A is 
- *          in position j in A*Pc.
- *          When searching for diagonal, perm_c[*] is applied to the
- *          row subscripts of A, so that diagonal threshold pivoting
- *          can find the diagonal of A, rather than that of A*Pc.
- *
- * perm_r   (input/output) int*, dimension (A->nrow)
- *          Row permutation vector which defines the permutation matrix Pr,
- *          perm_r[i] = j means row i of A is in position j in Pr*A.
- *          If options->Fact = SamePattern_SameRowPerm, the pivoting routine
- *             will try to use the input perm_r, unless a certain threshold
- *             criterion is violated. In that case, perm_r is overwritten by
- *             a new permutation determined by partial pivoting or diagonal
- *             threshold pivoting.
- *          Otherwise, perm_r is output argument;
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = SLU_NC, 
- *          Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info     (output) int*
- *          = 0: successful exit
- *          < 0: if info = -i, the i-th argument had an illegal value
- *          > 0: if info = i, and i is
- *             <= A->ncol: U(i,i) is exactly zero. The factorization has
- *                been completed, but the factor U is exactly singular,
- *                and division by zero will occur if it is used to solve a
- *                system of equations.
- *             > A->ncol: number of bytes allocated when memory allocation
- *                failure occurred, plus A->ncol. If lwork = -1, it is
- *                the estimated amount of space needed, plus A->ncol.
- *
- * ======================================================================
- *
- * Local Working Arrays: 
- * ======================
- *   m = number of rows in the matrix
- *   n = number of columns in the matrix
- *
- *   xprune[0:n-1]: xprune[*] points to locations in subscript 
- *     vector lsub[*]. For column i, xprune[i] denotes the point where 
- *     structural pruning begins. I.e. only xlsub[i],..,xprune[i]-1 need 
- *     to be traversed for symbolic factorization.
- *
- *   marker[0:3*m-1]: marker[i] = j means that node i has been 
- *     reached when working on column j.
- *     Storage: relative to original row subscripts
- *     NOTE: There are 3 of them: marker/marker1 are used for panel dfs, 
- *           see zpanel_dfs.c; marker2 is used for inner-factorization,
- *            see zcolumn_dfs.c.
- *
- *   parent[0:m-1]: parent vector used during dfs
- *      Storage: relative to new row subscripts
- *
- *   xplore[0:m-1]: xplore[i] gives the location of the next (dfs) 
- *     unexplored neighbor of i in lsub[*]
- *
- *   segrep[0:nseg-1]: contains the list of supernodal representatives
- *     in topological order of the dfs. A supernode representative is the 
- *     last column of a supernode.
- *      The maximum size of segrep[] is n.
- *
- *   repfnz[0:W*m-1]: for a nonzero segment U[*,j] that ends at a 
- *     supernodal representative r, repfnz[r] is the location of the first 
- *     nonzero in this segment.  It is also used during the dfs: repfnz[r]>0
- *     indicates the supernode r has been explored.
- *     NOTE: There are W of them, each used for one column of a panel. 
- *
- *   panel_lsub[0:W*m-1]: temporary for the nonzeros row indices below 
- *      the panel diagonal. These are filled in during zpanel_dfs(), and are
- *      used later in the inner LU factorization within the panel.
- *     panel_lsub[]/dense[] pair forms the SPA data structure.
- *     NOTE: There are W of them.
- *
- *   dense[0:W*m-1]: sparse accumulating (SPA) vector for intermediate values;
- *                NOTE: there are W of them.
- *
- *   tempv[0:*]: real temporary used for dense numeric kernels;
- *     The size of this array is defined by NUM_TEMPV() in zsp_defs.h.
- *
- */
-    /* Local working arrays */
-    NCPformat *Astore;
-    int       *iperm_r = NULL; /* inverse of perm_r; used when 
-                                  options->Fact == SamePattern_SameRowPerm */
-    int       *iperm_c; /* inverse of perm_c */
-    int       *iwork;
-    doublecomplex    *zwork;
-    int              *segrep, *repfnz, *parent, *xplore;
-    int              *panel_lsub; /* dense[]/panel_lsub[] pair forms a w-wide 
SPA */
-    int              *xprune;
-    int              *marker;
-    doublecomplex    *dense, *tempv;
-    int       *relax_end;
-    doublecomplex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int       *xsup, *supno;
-    int       *xlsub, *xlusup, *xusub;
-    int       nzlumax;
-    static GlobalLU_t Glu; /* persistent to facilitate multiple factors. */
-
-    /* Local scalars */
-    fact_t    fact = options->Fact;
-    double    diag_pivot_thresh = options->DiagPivotThresh;
-    int       pivrow;   /* pivotal row number in the original matrix A */
-    int       nseg1;   /* no of segments in U-column above panel row jcol */
-    int       nseg;    /* no of segments in each U-column */
-    register int jcol; 
-    register int kcol; /* end column of a relaxed snode */
-    register int icol;
-    register int i, k, jj, new_next, iinfo;
-    int       m, n, min_mn, jsupno, fsupc, nextlu, nextu;
-    int       w_def;   /* upper bound on panel width */
-    int       usepr, iperm_r_allocated = 0;
-    int       nnzL, nnzU;
-    int       *panel_histo = stat->panel_histo;
-    flops_t   *ops = stat->ops;
-
-    iinfo    = 0;
-    m        = A->nrow;
-    n        = A->ncol;
-    min_mn   = SUPERLU_MIN(m, n);
-    Astore   = A->Store;
-    a        = Astore->nzval;
-    asub     = Astore->rowind;
-    xa_begin = Astore->colbeg;
-    xa_end   = Astore->colend;
-
-    /* Allocate storage common to the factor routines */
-    *info = zLUMemInit(fact, work, lwork, m, n, Astore->nnz,
-                       panel_size, L, U, &Glu, &iwork, &zwork);
-    if ( *info ) return;
-    
-    xsup    = Glu.xsup;
-    supno   = Glu.supno;
-    xlsub   = Glu.xlsub;
-    xlusup  = Glu.xlusup;
-    xusub   = Glu.xusub;
-    
-    SetIWork(m, n, panel_size, iwork, &segrep, &parent, &xplore,
-            &repfnz, &panel_lsub, &xprune, &marker);
-    zSetRWork(m, panel_size, zwork, &dense, &tempv);
-    
-    usepr = (fact == SamePattern_SameRowPerm);
-    if ( usepr ) {
-       /* Compute the inverse of perm_r */
-       iperm_r = (int *) intMalloc(m);
-       for (k = 0; k < m; ++k) iperm_r[perm_r[k]] = k;
-       iperm_r_allocated = 1;
-    }
-    iperm_c = (int *) intMalloc(n);
-    for (k = 0; k < n; ++k) iperm_c[perm_c[k]] = k;
-
-    /* Identify relaxed snodes */
-    relax_end = (int *) intMalloc(n);
-    if ( options->SymmetricMode == YES ) {
-        heap_relax_snode(n, etree, relax, marker, relax_end); 
-    } else {
-        relax_snode(n, etree, relax, marker, relax_end); 
-    }
-    
-    ifill (perm_r, m, EMPTY);
-    ifill (marker, m * NO_MARKER, EMPTY);
-    supno[0] = -1;
-    xsup[0]  = xlsub[0] = xusub[0] = xlusup[0] = 0;
-    w_def    = panel_size;
-
-    /* 
-     * Work on one "panel" at a time. A panel is one of the following: 
-     *    (a) a relaxed supernode at the bottom of the etree, or
-     *    (b) panel_size contiguous columns, defined by the user
-     */
-    for (jcol = 0; jcol < min_mn; ) {
-
-       if ( relax_end[jcol] != EMPTY ) { /* start of a relaxed snode */
-           kcol = relax_end[jcol];       /* end of the relaxed snode */
-           panel_histo[kcol-jcol+1]++;
-
-           /* --------------------------------------
-            * Factorize the relaxed supernode(jcol:kcol) 
-            * -------------------------------------- */
-           /* Determine the union of the row structure of the snode */
-           if ( (*info = zsnode_dfs(jcol, kcol, asub, xa_begin, xa_end,
-                                   xprune, marker, &Glu)) != 0 )
-               return;
-
-            nextu    = xusub[jcol];
-           nextlu   = xlusup[jcol];
-           jsupno   = supno[jcol];
-           fsupc    = xsup[jsupno];
-           new_next = nextlu + (xlsub[fsupc+1]-xlsub[fsupc])*(kcol-jcol+1);
-           nzlumax = Glu.nzlumax;
-           while ( new_next > nzlumax ) {
-               if ( (*info = zLUMemXpand(jcol, nextlu, LUSUP, &nzlumax, &Glu)) 
)
-                   return;
-           }
-    
-           for (icol = jcol; icol<= kcol; icol++) {
-               xusub[icol+1] = nextu;
-               
-               /* Scatter into SPA dense[*] */
-               for (k = xa_begin[icol]; k < xa_end[icol]; k++)
-                   dense[asub[k]] = a[k];
-
-               /* Numeric update within the snode */
-               zsnode_bmod(icol, jsupno, fsupc, dense, tempv, &Glu, stat);
-
-               if ( (*info = zpivotL(icol, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-               
-#ifdef DEBUG
-               zprint_lu_col("[1]: ", icol, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol = icol;
-
-       } else { /* Work on one panel of panel_size columns */
-           
-           /* Adjust panel_size so that a panel won't overlap with the next 
-            * relaxed snode.
-            */
-           panel_size = w_def;
-           for (k = jcol + 1; k < SUPERLU_MIN(jcol+panel_size, min_mn); k++) 
-               if ( relax_end[k] != EMPTY ) {
-                   panel_size = k - jcol;
-                   break;
-               }
-           if ( k == min_mn ) panel_size = min_mn - jcol;
-           panel_histo[panel_size]++;
-
-           /* symbolic factor on a panel of columns */
-           zpanel_dfs(m, panel_size, jcol, A, perm_r, &nseg1,
-                     dense, panel_lsub, segrep, repfnz, xprune,
-                     marker, parent, xplore, &Glu);
-           
-           /* numeric sup-panel updates in topological order */
-           zpanel_bmod(m, panel_size, jcol, nseg1, dense,
-                       tempv, segrep, repfnz, &Glu, stat);
-           
-           /* Sparse LU within the panel, and below panel diagonal */
-           for ( jj = jcol; jj < jcol + panel_size; jj++) {
-               k = (jj - jcol) * m; /* column index for w-wide arrays */
-
-               nseg = nseg1;   /* Begin after all the panel segments */
-
-               if ((*info = zcolumn_dfs(m, jj, perm_r, &nseg, &panel_lsub[k],
-                                       segrep, &repfnz[k], xprune, marker,
-                                       parent, xplore, &Glu)) != 0) return;
-
-               /* Numeric updates */
-               if ((*info = zcolumn_bmod(jj, (nseg - nseg1), &dense[k],
-                                        tempv, &segrep[nseg1], &repfnz[k],
-                                        jcol, &Glu, stat)) != 0) return;
-               
-               /* Copy the U-segments to ucol[*] */
-               if ((*info = zcopy_to_ucol(jj, nseg, segrep, &repfnz[k],
-                                         perm_r, &dense[k], &Glu)) != 0)
-                   return;
-
-               if ( (*info = zpivotL(jj, diag_pivot_thresh, &usepr, perm_r,
-                                     iperm_r, iperm_c, &pivrow, &Glu, stat)) )
-                   if ( iinfo == 0 ) iinfo = *info;
-
-               /* Prune columns (0:jj-1) using column jj */
-               zpruneL(jj, perm_r, pivrow, nseg, segrep,
-                        &repfnz[k], xprune, &Glu);
-
-               /* Reset repfnz[] for this column */
-               resetrep_col (nseg, segrep, &repfnz[k]);
-               
-#ifdef DEBUG
-               zprint_lu_col("[2]: ", jj, pivrow, xprune, &Glu);
-#endif
-
-           }
-
-           jcol += panel_size; /* Move to the next panel */
-
-       } /* else */
-
-    } /* for */
-
-    *info = iinfo;
-    
-    if ( m > n ) {
-       k = 0;
-        for (i = 0; i < m; ++i) 
-            if ( perm_r[i] == EMPTY ) {
-               perm_r[i] = n + k;
-               ++k;
-           }
-    }
-
-    countnz(min_mn, xprune, &nnzL, &nnzU, &Glu);
-    fixupL(min_mn, perm_r, &Glu);
-
-    zLUWorkFree(iwork, zwork, &Glu); /* Free work space and compress storage */
-
-    if ( fact == SamePattern_SameRowPerm ) {
-        /* L and U structures may have changed due to possibly different
-          pivoting, even though the storage is available.
-          There could also be memory expansions, so the array locations
-           may have changed, */
-        ((SCformat *)L->Store)->nnz = nnzL;
-       ((SCformat *)L->Store)->nsuper = Glu.supno[n];
-       ((SCformat *)L->Store)->nzval = Glu.lusup;
-       ((SCformat *)L->Store)->nzval_colptr = Glu.xlusup;
-       ((SCformat *)L->Store)->rowind = Glu.lsub;
-       ((SCformat *)L->Store)->rowind_colptr = Glu.xlsub;
-       ((NCformat *)U->Store)->nnz = nnzU;
-       ((NCformat *)U->Store)->nzval = Glu.ucol;
-       ((NCformat *)U->Store)->rowind = Glu.usub;
-       ((NCformat *)U->Store)->colptr = Glu.xusub;
-    } else {
-        zCreate_SuperNode_Matrix(L, A->nrow, min_mn, nnzL, Glu.lusup, 
-                                Glu.xlusup, Glu.lsub, Glu.xlsub, Glu.supno,
-                                Glu.xsup, SLU_SC, SLU_Z, SLU_TRLU);
-       zCreate_CompCol_Matrix(U, min_mn, min_mn, nnzU, Glu.ucol, 
-                              Glu.usub, Glu.xusub, SLU_NC, SLU_Z, SLU_TRU);
-    }
-    
-    ops[FACT] += ops[TRSV] + ops[GEMV];        
-    
-    if ( iperm_r_allocated ) SUPERLU_FREE (iperm_r);
-    SUPERLU_FREE (iperm_c);
-    SUPERLU_FREE (relax_end);
-
-}
diff --git a/superlu/zgstrs.c b/superlu/zgstrs.c
deleted file mode 100644
index e415e47a..00000000
--- a/superlu/zgstrs.c
+++ /dev/null
@@ -1,344 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include "slu_zdefs.h"
-extern void ztrsm_();
-extern void zgemm_();
-
-
-/* 
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-void
-zgstrs (trans_t trans, SuperMatrix *L, SuperMatrix *U,
-        int *perm_c, int *perm_r, SuperMatrix *B,
-        SuperLUStat_t *stat, int *info)
-{
-/*
- * Purpose
- * =======
- *
- * ZGSTRS solves a system of linear equations A*X=B or A'*X=B
- * with A sparse and B dense, using the LU factorization computed by
- * ZGSTRF.
- *
- * See supermatrix.h for the definition of 'SuperMatrix' structure.
- *
- * Arguments
- * =========
- *
- * trans   (input) trans_t
- *          Specifies the form of the system of equations:
- *          = NOTRANS: A * X = B  (No transpose)
- *          = TRANS:   A'* X = B  (Transpose)
- *          = CONJ:    A**H * X = B  (Conjugate transpose)
- *
- * L       (input) SuperMatrix*
- *         The factor L from the factorization Pr*A*Pc=L*U as computed by
- *         zgstrf(). Use compressed row subscripts storage for supernodes,
- *         i.e., L has types: Stype = SLU_SC, Dtype = SLU_Z, Mtype = SLU_TRLU.
- *
- * U       (input) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U as computed by
- *         zgstrf(). Use column-wise storage scheme, i.e., U has types:
- *         Stype = SLU_NC, Dtype = SLU_Z, Mtype = SLU_TRU.
- *
- * perm_c  (input) int*, dimension (L->ncol)
- *        Column permutation vector, which defines the 
- *         permutation matrix Pc; perm_c[i] = j means column i of A is 
- *         in position j in A*Pc.
- *
- * perm_r  (input) int*, dimension (L->nrow)
- *         Row permutation vector, which defines the permutation matrix Pr; 
- *         perm_r[i] = j means row i of A is in position j in Pr*A.
- *
- * B       (input/output) SuperMatrix*
- *         B has types: Stype = SLU_DN, Dtype = SLU_Z, Mtype = SLU_GE.
- *         On entry, the right hand side matrix.
- *         On exit, the solution matrix if info = 0;
- *
- * stat     (output) SuperLUStat_t*
- *          Record the statistics on runtime and floating-point operation 
count.
- *          See util.h for the definition of 'SuperLUStat_t'.
- *
- * info    (output) int*
- *        = 0: successful exit
- *        < 0: if info = -i, the i-th argument had an illegal value
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1, ftcs2, ftcs3, ftcs4;
-#endif
-    int      incx = 1, incy = 1;
-#ifdef USE_VENDOR_BLAS
-    doublecomplex   alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    doublecomplex   *work_col;
-#endif
-    doublecomplex   temp_comp;
-    DNformat *Bstore;
-    doublecomplex   *Bmat;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex   *Lval, *Uval;
-    int      fsupc, nrow, nsupr, nsupc, luptr, istart, irow;
-    int      i, j, k, iptr, jcol, n, ldb, nrhs;
-    doublecomplex   *work, *rhs_work, *soln;
-    flops_t  solve_ops;
-    void zprint_soln();
-
-    /* Test input parameters ... */
-    *info = 0;
-    Bstore = B->Store;
-    ldb = Bstore->lda;
-    nrhs = B->ncol;
-    if ( trans != NOTRANS && trans != TRANS && trans != CONJ ) *info = -1;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ||
-             L->Stype != SLU_SC || L->Dtype != SLU_Z || L->Mtype != SLU_TRLU )
-       *info = -2;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ||
-             U->Stype != SLU_NC || U->Dtype != SLU_Z || U->Mtype != SLU_TRU )
-       *info = -3;
-    else if ( ldb < SUPERLU_MAX(0, L->nrow) ||
-             B->Stype != SLU_DN || B->Dtype != SLU_Z || B->Mtype != SLU_GE )
-       *info = -6;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("zgstrs", &i);
-       return;
-    }
-
-    n = L->nrow;
-    work = doublecomplexCalloc(n * nrhs);
-    if ( !work ) ABORT("Malloc fails for local work[].");
-    soln = doublecomplexMalloc(n);
-    if ( !soln ) ABORT("Malloc fails for local soln[].");
-
-    Bmat = Bstore->nzval;
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-    
-    if ( trans == NOTRANS ) {
-       /* Permute right hand sides to form Pr*B */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_r[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-       /* Forward solve PLy=Pb. */
-       for (k = 0; k <= Lstore->nsuper; k++) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           nrow = nsupr - nsupc;
-
-           solve_ops += 4 * nsupc * (nsupc - 1) * nrhs;
-           solve_ops += 8 * nrow * nsupc * nrhs;
-           
-           if ( nsupc == 1 ) {
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   luptr = L_NZ_START(fsupc);
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); iptr++){
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       zz_mult(&temp_comp, &rhs_work[fsupc], &Lval[luptr]);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           } else {
-               luptr = L_NZ_START(fsupc);
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("N", strlen("N"));
-               ftcs3 = _cptofcd("U", strlen("U"));
-               CTRSM( ftcs1, ftcs1, ftcs2, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               CGEMM( ftcs2, ftcs2, &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#else
-               ztrsm_("L", "L", "N", "U", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-               
-               zgemm_( "N", "N", &nrow, &nrhs, &nsupc, &alpha, 
-                       &Lval[luptr+nsupc], &nsupr, &Bmat[fsupc], &ldb, 
-                       &beta, &work[0], &n );
-#endif
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   work_col = &work[j*n];
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &work_col[i]);
-                       work_col[i].r = 0.0;
-                       work_col[i].i = 0.0;
-                       iptr++;
-                   }
-               }
-#else          
-               for (j = 0; j < nrhs; j++) {
-                   rhs_work = &Bmat[j*ldb];
-                   zlsolve (nsupr, nsupc, &Lval[luptr], &rhs_work[fsupc]);
-                   zmatvec (nsupr, nrow, nsupc, &Lval[luptr+nsupc],
-                           &rhs_work[fsupc], &work[0] );
-
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; i++) {
-                       irow = L_SUB(iptr);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &work[i]);
-                       work[i].r = 0.;
-                       work[i].i = 0.;
-                       iptr++;
-                   }
-               }
-#endif             
-           } /* else ... */
-       } /* for L-solve */
-
-#ifdef DEBUG
-       printf("After L-solve: y=\n");
-       zprint_soln(n, nrhs, Bmat);
-#endif
-
-       /*
-        * Back solve Ux=y.
-        */
-       for (k = Lstore->nsuper; k >= 0; k--) {
-           fsupc = L_FST_SUPC(k);
-           istart = L_SUB_START(fsupc);
-           nsupr = L_SUB_START(fsupc+1) - istart;
-           nsupc = L_FST_SUPC(k+1) - fsupc;
-           luptr = L_NZ_START(fsupc);
-
-           solve_ops += 4 * nsupc * (nsupc + 1) * nrhs;
-
-           if ( nsupc == 1 ) {
-               rhs_work = &Bmat[0];
-               for (j = 0; j < nrhs; j++) {
-                   z_div(&rhs_work[fsupc], &rhs_work[fsupc], &Lval[luptr]);
-                   rhs_work += ldb;
-               }
-           } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-               ftcs1 = _cptofcd("L", strlen("L"));
-               ftcs2 = _cptofcd("U", strlen("U"));
-               ftcs3 = _cptofcd("N", strlen("N"));
-               CTRSM( ftcs1, ftcs2, ftcs3, ftcs3, &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#else
-               ztrsm_("L", "U", "N", "N", &nsupc, &nrhs, &alpha,
-                      &Lval[luptr], &nsupr, &Bmat[fsupc], &ldb);
-#endif
-#else          
-               for (j = 0; j < nrhs; j++)
-                   zusolve ( nsupr, nsupc, &Lval[luptr], &Bmat[fsupc+j*ldb] );
-#endif         
-           }
-
-           for (j = 0; j < nrhs; ++j) {
-               rhs_work = &Bmat[j*ldb];
-               for (jcol = fsupc; jcol < fsupc + nsupc; jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++ ){
-                       irow = U_SUB(i);
-                       zz_mult(&temp_comp, &rhs_work[jcol], &Uval[i]);
-                       z_sub(&rhs_work[irow], &rhs_work[irow], &temp_comp);
-                   }
-               }
-           }
-           
-       } /* for U-solve */
-
-#ifdef DEBUG
-       printf("After U-solve: x=\n");
-       zprint_soln(n, nrhs, Bmat);
-#endif
-
-       /* Compute the final solution X := Pc*X. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_c[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-       
-        stat->ops[SOLVE] = solve_ops;
-
-    } else { /* Solve A'*X=B or CONJ(A)*X=B */
-       /* Permute right hand sides to form Pc'*B. */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[perm_c[k]] = rhs_work[k];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-       stat->ops[SOLVE] = 0;
-        if (trans == TRANS) {
-           for (k = 0; k < nrhs; ++k) {
-               /* Multiply by inv(U'). */
-               sp_ztrsv("U", "T", "N", L, U, &Bmat[k*ldb], stat, info);
-           
-               /* Multiply by inv(L'). */
-               sp_ztrsv("L", "T", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         } else { /* trans == CONJ */
-            for (k = 0; k < nrhs; ++k) {                
-                /* Multiply by conj(inv(U')). */
-                sp_ztrsv("U", "C", "N", L, U, &Bmat[k*ldb], stat, info);
-                
-                /* Multiply by conj(inv(L')). */
-                sp_ztrsv("L", "C", "U", L, U, &Bmat[k*ldb], stat, info);
-           }
-         }
-       /* Compute the final solution X := Pr'*X (=inv(Pr)*X) */
-       for (i = 0; i < nrhs; i++) {
-           rhs_work = &Bmat[i*ldb];
-           for (k = 0; k < n; k++) soln[k] = rhs_work[perm_r[k]];
-           for (k = 0; k < n; k++) rhs_work[k] = soln[k];
-       }
-
-    }
-
-    SUPERLU_FREE(work);
-    SUPERLU_FREE(soln);
-}
-
-/*
- * Diagnostic print of the solution vector 
- */
-void
-zprint_soln(int n, int nrhs, doublecomplex *soln)
-{
-    int i;
-
-    for (i = 0; i < n; i++) 
-       printf("\t%d: %.4f\n", i, soln[i].r);
-}
diff --git a/superlu/zlacon.c b/superlu/zlacon.c
deleted file mode 100644
index 33822b3e..00000000
--- a/superlu/zlacon.c
+++ /dev/null
@@ -1,236 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#include <math.h>
-#include "slu_Cnames.h"
-#include "slu_dcomplex.h"
-extern void zcopy_();
-
-
-int
-zlacon_(int *n, doublecomplex *v, doublecomplex *x, double *est, int *kase)
-
-{
-/*
-    Purpose   
-    =======   
-
-    ZLACON estimates the 1-norm of a square matrix A.   
-    Reverse communication is used for evaluating matrix-vector products. 
-  
-
-    Arguments   
-    =========   
-
-    N      (input) INT
-           The order of the matrix.  N >= 1.   
-
-    V      (workspace) DOUBLE COMPLEX PRECISION array, dimension (N)   
-           On the final return, V = A*W,  where  EST = norm(V)/norm(W)   
-           (W is not returned).   
-
-    X      (input/output) DOUBLE COMPLEX PRECISION array, dimension (N)   
-           On an intermediate return, X should be overwritten by   
-                 A * X,   if KASE=1,   
-                 A' * X,  if KASE=2,
-           where A' is the conjugate transpose of A,
-           and ZLACON must be re-called with all the other parameters   
-           unchanged.   
-
-
-    EST    (output) DOUBLE PRECISION   
-           An estimate (a lower bound) for norm(A).   
-
-    KASE   (input/output) INT
-           On the initial call to ZLACON, KASE should be 0.   
-           On an intermediate return, KASE will be 1 or 2, indicating   
-           whether X should be overwritten by A * X  or A' * X.   
-           On the final return from ZLACON, KASE will again be 0.   
-
-    Further Details   
-    ======= =======   
-
-    Contributed by Nick Higham, University of Manchester.   
-    Originally named CONEST, dated March 16, 1988.   
-
-    Reference: N.J. Higham, "FORTRAN codes for estimating the one-norm of 
-    a real or complex matrix, with applications to condition estimation", 
-    ACM Trans. Math. Soft., vol. 14, no. 4, pp. 381-396, December 1988.   
-    ===================================================================== 
-*/
-
-    /* Table of constant values */
-    int c__1 = 1;
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-
-    /* System generated locals */
-    double d__1;
-    
-    /* Local variables */
-    static int iter;
-    static int jump, jlast;
-    static double altsgn, estold;
-    static int i, j;
-    double temp;
-    double safmin;
-    extern double dlamch_(char *);
-    extern int izmax1_(int *, doublecomplex *, int *);
-    extern double dzsum1_(int *, doublecomplex *, int *);
-
-    safmin = dlamch_("Safe minimum");
-    if ( *kase == 0 ) {
-       for (i = 0; i < *n; ++i) {
-           x[i].r = 1. / (double) (*n);
-           x[i].i = 0.;
-       }
-       *kase = 1;
-       jump = 1;
-       return 0;
-    }
-
-    switch (jump) {
-       case 1:  goto L20;
-       case 2:  goto L40;
-       case 3:  goto L70;
-       case 4:  goto L110;
-       case 5:  goto L140;
-    }
-
-    /*     ................ ENTRY   (JUMP = 1)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY A*X. */
-  L20:
-    if (*n == 1) {
-       v[0] = x[0];
-       *est = z_abs(&v[0]);
-       /*        ... QUIT */
-       goto L150;
-    }
-    *est = dzsum1_(n, x, &c__1);
-
-    for (i = 0; i < *n; ++i) {
-       d__1 = z_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 2;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 2)   
-          FIRST ITERATION.  X HAS BEEN OVERWRITTEN BY TRANSPOSE(A)*X. */
-L40:
-    j = izmax1_(n, &x[0], &c__1);
-    --j;
-    iter = 2;
-
-    /*     MAIN LOOP - ITERATIONS 2,3,...,ITMAX. */
-L50:
-    for (i = 0; i < *n; ++i) x[i] = zero;
-    x[j] = one;
-    *kase = 1;
-    jump = 3;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 3)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L70:
-#ifdef _CRAY
-    CCOPY(n, x, &c__1, v, &c__1);
-#else
-    zcopy_(n, x, &c__1, v, &c__1);
-#endif
-    estold = *est;
-    *est = dzsum1_(n, v, &c__1);
-
-
-L90:
-    /*     TEST FOR CYCLING. */
-    if (*est <= estold) goto L120;
-
-    for (i = 0; i < *n; ++i) {
-       d__1 = z_abs(&x[i]);
-       if (d__1 > safmin) {
-           d__1 = 1 / d__1;
-           x[i].r *= d__1;
-           x[i].i *= d__1;
-       } else {
-           x[i] = one;
-       }
-    }
-    *kase = 2;
-    jump = 4;
-    return 0;
-
-    /*     ................ ENTRY   (JUMP = 4)   
-          X HAS BEEN OVERWRITTEN BY TRANDPOSE(A)*X. */
-L110:
-    jlast = j;
-    j = izmax1_(n, &x[0], &c__1);
-    --j;
-    if (x[jlast].r != (d__1 = x[j].r, fabs(d__1)) && iter < 5) {
-       ++iter;
-       goto L50;
-    }
-
-    /*     ITERATION COMPLETE.  FINAL STAGE. */
-L120:
-    altsgn = 1.;
-    for (i = 1; i <= *n; ++i) {
-       x[i-1].r = altsgn * ((double)(i - 1) / (double)(*n - 1) + 1.);
-       x[i-1].i = 0.;
-       altsgn = -altsgn;
-    }
-    *kase = 1;
-    jump = 5;
-    return 0;
-    
-    /*     ................ ENTRY   (JUMP = 5)   
-          X HAS BEEN OVERWRITTEN BY A*X. */
-L140:
-    temp = dzsum1_(n, x, &c__1) / (double)(*n * 3) * 2.;
-    if (temp > *est) {
-#ifdef _CRAY
-       CCOPY(n, &x[0], &c__1, &v[0], &c__1);
-#else
-       zcopy_(n, &x[0], &c__1, &v[0], &c__1);
-#endif
-       *est = temp;
-    }
-
-L150:
-    *kase = 0;
-    return 0;
-
-} /* zlacon_ */
diff --git a/superlu/zlangs.c b/superlu/zlangs.c
deleted file mode 100644
index bb3f95a2..00000000
--- a/superlu/zlangs.c
+++ /dev/null
@@ -1,131 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  zlangs.c
- * History:     Modified from lapack routine ZLANGE
- */
-#include <math.h>
-#include "slu_zdefs.h"
-
-double zlangs(char *norm, SuperMatrix *A)
-{
-/* 
-    Purpose   
-    =======   
-
-    ZLANGS returns the value of the one norm, or the Frobenius norm, or 
-    the infinity norm, or the element of largest absolute value of a 
-    real matrix A.   
-
-    Description   
-    ===========   
-
-    ZLANGE returns the value   
-
-       ZLANGE = ( max(abs(A(i,j))), NORM = 'M' or 'm'   
-                (   
-                ( norm1(A),         NORM = '1', 'O' or 'o'   
-                (   
-                ( normI(A),         NORM = 'I' or 'i'   
-                (   
-                ( normF(A),         NORM = 'F', 'f', 'E' or 'e'   
-
-    where  norm1  denotes the  one norm of a matrix (maximum column sum), 
-    normI  denotes the  infinity norm  of a matrix  (maximum row sum) and 
-    normF  denotes the  Frobenius norm of a matrix (square root of sum of 
-    squares).  Note that  max(abs(A(i,j)))  is not a  matrix norm.   
-
-    Arguments   
-    =========   
-
-    NORM    (input) CHARACTER*1   
-            Specifies the value to be returned in ZLANGE as described above.   
-    A       (input) SuperMatrix*
-            The M by N sparse matrix A. 
-
-   ===================================================================== 
-*/
-    
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int      i, j, irow;
-    double   value, sum;
-    double   *rwork;
-
-    Astore = A->Store;
-    Aval   = Astore->nzval;
-    
-    if ( SUPERLU_MIN(A->nrow, A->ncol) == 0) {
-       value = 0.;
-       
-    } else if (lsame_(norm, "M")) {
-       /* Find max(abs(A(i,j))). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++)
-               value = SUPERLU_MAX( value, z_abs( &Aval[i]) );
-       
-    } else if (lsame_(norm, "O") || *(unsigned char *)norm == '1') {
-       /* Find norm1(A). */
-       value = 0.;
-       for (j = 0; j < A->ncol; ++j) {
-           sum = 0.;
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) 
-               sum += z_abs( &Aval[i] );
-           value = SUPERLU_MAX(value,sum);
-       }
-       
-    } else if (lsame_(norm, "I")) {
-       /* Find normI(A). */
-       if ( !(rwork = (double *) SUPERLU_MALLOC(A->nrow * sizeof(double))) )
-           ABORT("SUPERLU_MALLOC fails for rwork.");
-       for (i = 0; i < A->nrow; ++i) rwork[i] = 0.;
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; i++) {
-               irow = Astore->rowind[i];
-               rwork[irow] += z_abs( &Aval[i] );
-           }
-       value = 0.;
-       for (i = 0; i < A->nrow; ++i)
-           value = SUPERLU_MAX(value, rwork[i]);
-       
-       SUPERLU_FREE (rwork);
-       
-    } else if (lsame_(norm, "F") || lsame_(norm, "E")) {
-       /* Find normF(A). */
-       ABORT("Not implemented.");
-    } else
-       ABORT("Illegal norm specified.");
-
-    return (value);
-
-} /* zlangs */
-
diff --git a/superlu/zlaqgs.c b/superlu/zlaqgs.c
deleted file mode 100644
index 1753737d..00000000
--- a/superlu/zlaqgs.c
+++ /dev/null
@@ -1,159 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:  zlaqgs.c
- * History:     Modified from LAPACK routine ZLAQGE
- */
-#include <math.h>
-#include "slu_zdefs.h"
-
-void
-zlaqgs(SuperMatrix *A, double *r, double *c, 
-       double rowcnd, double colcnd, double amax, char *equed)
-{
-/*
-    Purpose   
-    =======   
-
-    ZLAQGS equilibrates a general sparse M by N matrix A using the row and   
-    scaling factors in the vectors R and C.   
-
-    See supermatrix.h for the definition of 'SuperMatrix' structure.
-
-    Arguments   
-    =========   
-
-    A       (input/output) SuperMatrix*
-            On exit, the equilibrated matrix.  See EQUED for the form of 
-            the equilibrated matrix. The type of A can be:
-           Stype = NC; Dtype = SLU_Z; Mtype = GE.
-           
-    R       (input) double*, dimension (A->nrow)
-            The row scale factors for A.
-           
-    C       (input) double*, dimension (A->ncol)
-            The column scale factors for A.
-           
-    ROWCND  (input) double
-            Ratio of the smallest R(i) to the largest R(i).
-           
-    COLCND  (input) double
-            Ratio of the smallest C(i) to the largest C(i).
-           
-    AMAX    (input) double
-            Absolute value of largest matrix entry.
-           
-    EQUED   (output) char*
-            Specifies the form of equilibration that was done.   
-            = 'N':  No equilibration   
-            = 'R':  Row equilibration, i.e., A has been premultiplied by  
-                    diag(R).   
-            = 'C':  Column equilibration, i.e., A has been postmultiplied  
-                    by diag(C).   
-            = 'B':  Both row and column equilibration, i.e., A has been
-                    replaced by diag(R) * A * diag(C).   
-
-    Internal Parameters   
-    ===================   
-
-    THRESH is a threshold value used to decide if row or column scaling   
-    should be done based on the ratio of the row or column scaling   
-    factors.  If ROWCND < THRESH, row scaling is done, and if   
-    COLCND < THRESH, column scaling is done.   
-
-    LARGE and SMALL are threshold values used to decide if row scaling   
-    should be done based on the absolute size of the largest matrix   
-    element.  If AMAX > LARGE or AMAX < SMALL, row scaling is done.   
-
-    ===================================================================== 
-*/
-
-#define THRESH    (0.1)
-    
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int i, j, irow;
-    double large, small, cj;
-    extern double dlamch_(char *);
-    double temp;
-
-
-    /* Quick return if possible */
-    if (A->nrow <= 0 || A->ncol <= 0) {
-       *(unsigned char *)equed = 'N';
-       return;
-    }
-
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Initialize LARGE and SMALL. */
-    small = dlamch_("Safe minimum") / dlamch_("Precision");
-    large = 1. / small;
-
-    if (rowcnd >= THRESH && amax >= small && amax <= large) {
-       if (colcnd >= THRESH)
-           *(unsigned char *)equed = 'N';
-       else {
-           /* Column scaling */
-           for (j = 0; j < A->ncol; ++j) {
-               cj = c[j];
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   zd_mult(&Aval[i], &Aval[i], cj);
-                }
-           }
-           *(unsigned char *)equed = 'C';
-       }
-    } else if (colcnd >= THRESH) {
-       /* Row scaling, no column scaling */
-       for (j = 0; j < A->ncol; ++j)
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               zd_mult(&Aval[i], &Aval[i], r[irow]);
-           }
-       *(unsigned char *)equed = 'R';
-    } else {
-       /* Row and column scaling */
-       for (j = 0; j < A->ncol; ++j) {
-           cj = c[j];
-           for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-               irow = Astore->rowind[i];
-               temp = cj * r[irow];
-               zd_mult(&Aval[i], &Aval[i], temp);
-           }
-       }
-       *(unsigned char *)equed = 'B';
-    }
-
-    return;
-
-} /* zlaqgs */
-
diff --git a/superlu/zmemory.c b/superlu/zmemory.c
deleted file mode 100644
index 36968d65..00000000
--- a/superlu/zmemory.c
+++ /dev/null
@@ -1,689 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-#include "slu_zdefs.h"
-
-/* Constants */
-#define NO_MEMTYPE  4      /* 0: lusup;
-                             1: ucol;
-                             2: lsub;
-                             3: usub */
-#define GluIntArray(n)   (5 * (n) + 5)
-
-/* Internal prototypes */
-void  *zexpand (int *, MemType,int, int, GlobalLU_t *);
-int   zLUWorkInit (int, int, int, int **, doublecomplex **, LU_space_t);
-void  copy_mem_doublecomplex (int, void *, void *);
-void  zStackCompress (GlobalLU_t *);
-void  zSetupSpace (void *, int, LU_space_t *);
-void  *zuser_malloc (int, int);
-void  zuser_free (int, int);
-
-/* External prototypes (in memory.c - prec-indep) */
-extern void    copy_mem_int    (int, void *, void *);
-extern void    user_bcopy      (char *, char *, int);
-
-/* Headers for 4 types of dynamatically managed memory */
-typedef struct e_node {
-    int size;      /* length of the memory that has been used */
-    void *mem;     /* pointer to the new malloc'd store */
-} ExpHeader;
-
-typedef struct {
-    int  size;
-    int  used;
-    int  top1;  /* grow upward, relative to &array[0] */
-    int  top2;  /* grow downward */
-    void *array;
-} LU_stack_t;
-
-/* Variables local to this file */
-static ExpHeader *expanders = 0; /* Array of pointers to 4 types of memory */
-static LU_stack_t stack;
-static int no_expand;
-
-/* Macros to manipulate stack */
-#define StackFull(x)         ( x + stack.used >= stack.size )
-#define NotDoubleAlign(addr) ( (long int)addr & 7 )
-#define DoubleAlign(addr)    ( ((long int)addr + 7) & ~7L )
-#define TempSpace(m, w)      ( (2*w + 4 + NO_MARKER) * m * sizeof(int) + \
-                             (w + 1) * m * sizeof(doublecomplex) )
-#define Reduce(alpha)        ((alpha + 1) / 2)  /* i.e. (alpha-1)/2 + 1 */
-
-
-
-
-/*
- * Setup the memory model to be used for factorization.
- *    lwork = 0: use system malloc;
- *    lwork > 0: use user-supplied work[] space.
- */
-void zSetupSpace(void *work, int lwork, LU_space_t *MemModel)
-{
-    if ( lwork == 0 ) {
-       *MemModel = SYSTEM; /* malloc/free */
-    } else if ( lwork > 0 ) {
-       *MemModel = USER;   /* user provided space */
-       stack.used = 0;
-       stack.top1 = 0;
-       stack.top2 = (lwork/4)*4; /* must be word addressable */
-       stack.size = stack.top2;
-       stack.array = (void *) work;
-    }
-}
-
-
-
-void *zuser_malloc(int bytes, int which_end)
-{
-    void *buf;
-    
-    if ( StackFull(bytes) ) return (NULL);
-
-    if ( which_end == HEAD ) {
-       buf = (char*) stack.array + stack.top1;
-       stack.top1 += bytes;
-    } else {
-       stack.top2 -= bytes;
-       buf = (char*) stack.array + stack.top2;
-    }
-    
-    stack.used += bytes;
-    return buf;
-}
-
-
-void zuser_free(int bytes, int which_end)
-{
-    if ( which_end == HEAD ) {
-       stack.top1 -= bytes;
-    } else {
-       stack.top2 += bytes;
-    }
-    stack.used -= bytes;
-}
-
-
-
-/*
- * mem_usage consists of the following fields:
- *    - for_lu (float)
- *      The amount of space used in bytes for the L\U data structures.
- *    - total_needed (float)
- *      The amount of space needed in bytes to perform factorization.
- *    - expansions (int)
- *      Number of memory expansions during the LU factorization.
- */
-int zQuerySpace(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    register int n, iword, dword, panel_size = sp_ienv(1);
-
-    Lstore = L->Store;
-    Ustore = U->Store;
-    n = L->ncol;
-    iword = sizeof(int);
-    dword = sizeof(doublecomplex);
-
-    /* For LU factors */
-    mem_usage->for_lu = (float)( (4*n + 3) * iword + Lstore->nzval_colptr[n] *
-                                dword + Lstore->rowind_colptr[n] * iword );
-    mem_usage->for_lu += (float)( (n + 1) * iword +
-                                Ustore->colptr[n] * (dword + iword) );
-
-    /* Working storage to support factorization */
-    mem_usage->total_needed = mem_usage->for_lu +
-       (float)( (2 * panel_size + 4 + NO_MARKER) * n * iword +
-               (panel_size + 1) * n * dword );
-
-    mem_usage->expansions = --no_expand;
-
-    return 0;
-} /* zQuerySpace */
-
-/*
- * Allocate storage for the data structures common to all factor routines.
- * For those unpredictable size, make a guess as FILL * nnz(A).
- * Return value:
- *     If lwork = -1, return the estimated amount of space required, plus n;
- *     otherwise, return the amount of space actually allocated when
- *     memory allocation failure occurred.
- */
-int
-zLUMemInit(fact_t fact, void *work, int lwork, int m, int n, int annz,
-         int panel_size, SuperMatrix *L, SuperMatrix *U, GlobalLU_t *Glu,
-         int **iwork, doublecomplex **dwork)
-{
-    int      info, iword, dword;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    int      *xsup, *supno;
-    int      *lsub, *xlsub;
-    doublecomplex   *lusup;
-    int      *xlusup;
-    doublecomplex   *ucol;
-    int      *usub, *xusub;
-    int      nzlmax, nzumax, nzlumax;
-    int      FILL = sp_ienv(6);
-    
-    Glu->n    = n;
-    no_expand = 0;
-    iword     = sizeof(int);
-    dword     = sizeof(doublecomplex);
-
-    if ( !expanders )  
-        expanders = (ExpHeader*)SUPERLU_MALLOC(NO_MEMTYPE * sizeof(ExpHeader));
-    if ( !expanders ) ABORT("SUPERLU_MALLOC fails for expanders");
-    
-    if ( fact != SamePattern_SameRowPerm ) {
-       /* Guess for L\U factors */
-       nzumax = nzlumax = FILL * annz;
-       nzlmax = SUPERLU_MAX(1, FILL/4.) * annz;
-
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else {
-           zSetupSpace(work, lwork, &Glu->MemModel);
-       }
-       
-#if ( PRNTlevel >= 1 )
-       printf("zLUMemInit() called: FILL %ld, nzlmax %ld, nzumax %ld\n", 
-              FILL, nzlmax, nzumax);
-       fflush(stdout);
-#endif 
-       
-       /* Integer pointers for L\U factors */
-       if ( Glu->MemModel == SYSTEM ) {
-           xsup   = intMalloc(n+1);
-           supno  = intMalloc(n+1);
-           xlsub  = intMalloc(n+1);
-           xlusup = intMalloc(n+1);
-           xusub  = intMalloc(n+1);
-       } else {
-           xsup   = (int *)zuser_malloc((n+1) * iword, HEAD);
-           supno  = (int *)zuser_malloc((n+1) * iword, HEAD);
-           xlsub  = (int *)zuser_malloc((n+1) * iword, HEAD);
-           xlusup = (int *)zuser_malloc((n+1) * iword, HEAD);
-           xusub  = (int *)zuser_malloc((n+1) * iword, HEAD);
-       }
-
-       lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
-       ucol  = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
-       lsub  = (int *)    zexpand( &nzlmax, LSUB, 0, 0, Glu );
-       usub  = (int *)    zexpand( &nzumax, USUB, 0, 1, Glu );
-
-       while ( !lusup || !ucol || !lsub || !usub ) {
-           if ( Glu->MemModel == SYSTEM ) {
-               SUPERLU_FREE(lusup); 
-               SUPERLU_FREE(ucol); 
-               SUPERLU_FREE(lsub); 
-               SUPERLU_FREE(usub);
-           } else {
-               zuser_free((nzlumax+nzumax)*dword+(nzlmax+nzumax)*iword, HEAD);
-           }
-           nzlumax /= 2;
-           nzumax /= 2;
-           nzlmax /= 2;
-           if ( nzlumax < annz ) {
-               printf("Not enough memory to perform factorization.\n");
-               return (zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-           }
-#if ( PRNTlevel >= 1)
-           printf("zLUMemInit() reduce size: nzlmax %ld, nzumax %ld\n", 
-                  nzlmax, nzumax);
-           fflush(stdout);
-#endif
-           lusup = (doublecomplex *) zexpand( &nzlumax, LUSUP, 0, 0, Glu );
-           ucol  = (doublecomplex *) zexpand( &nzumax, UCOL, 0, 0, Glu );
-           lsub  = (int *)    zexpand( &nzlmax, LSUB, 0, 0, Glu );
-           usub  = (int *)    zexpand( &nzumax, USUB, 0, 1, Glu );
-       }
-       
-    } else {
-       /* fact == SamePattern_SameRowPerm */
-       Lstore   = L->Store;
-       Ustore   = U->Store;
-       xsup     = Lstore->sup_to_col;
-       supno    = Lstore->col_to_sup;
-       xlsub    = Lstore->rowind_colptr;
-       xlusup   = Lstore->nzval_colptr;
-       xusub    = Ustore->colptr;
-       nzlmax   = Glu->nzlmax;    /* max from previous factorization */
-       nzumax   = Glu->nzumax;
-       nzlumax  = Glu->nzlumax;
-       
-       if ( lwork == -1 ) {
-           return ( GluIntArray(n) * iword + TempSpace(m, panel_size)
-                   + (nzlmax+nzumax)*iword + (nzlumax+nzumax)*dword + n );
-        } else if ( lwork == 0 ) {
-           Glu->MemModel = SYSTEM;
-       } else {
-           Glu->MemModel = USER;
-           stack.top2 = (lwork/4)*4; /* must be word-addressable */
-           stack.size = stack.top2;
-       }
-       
-       lsub  = expanders[LSUB].mem  = Lstore->rowind;
-       lusup = expanders[LUSUP].mem = Lstore->nzval;
-       usub  = expanders[USUB].mem  = Ustore->rowind;
-       ucol  = expanders[UCOL].mem  = Ustore->nzval;;
-       expanders[LSUB].size         = nzlmax;
-       expanders[LUSUP].size        = nzlumax;
-       expanders[USUB].size         = nzumax;
-       expanders[UCOL].size         = nzumax;  
-    }
-
-    Glu->xsup    = xsup;
-    Glu->supno   = supno;
-    Glu->lsub    = lsub;
-    Glu->xlsub   = xlsub;
-    Glu->lusup   = lusup;
-    Glu->xlusup  = xlusup;
-    Glu->ucol    = ucol;
-    Glu->usub    = usub;
-    Glu->xusub   = xusub;
-    Glu->nzlmax  = nzlmax;
-    Glu->nzumax  = nzumax;
-    Glu->nzlumax = nzlumax;
-    
-    info = zLUWorkInit(m, n, panel_size, iwork, dwork, Glu->MemModel);
-    if ( info )
-       return ( info + zmemory_usage(nzlmax, nzumax, nzlumax, n) + n);
-    
-    ++no_expand;
-    return 0;
-    
-} /* zLUMemInit */
-
-/* Allocate known working storage. Returns 0 if success, otherwise
-   returns the number of bytes allocated so far when failure occurred. */
-int
-zLUWorkInit(int m, int n, int panel_size, int **iworkptr, 
-            doublecomplex **dworkptr, LU_space_t MemModel)
-{
-    int    isize, dsize, extra;
-    doublecomplex *old_ptr;
-    int    maxsuper = sp_ienv(3),
-           rowblk   = sp_ienv(4);
-
-    isize = ( (2 * panel_size + 3 + NO_MARKER ) * m + n ) * sizeof(int);
-    dsize = (m * panel_size +
-            NUM_TEMPV(m,panel_size,maxsuper,rowblk)) * sizeof(doublecomplex);
-    
-    if ( MemModel == SYSTEM ) 
-       *iworkptr = (int *) intCalloc(isize/sizeof(int));
-    else
-       *iworkptr = (int *) zuser_malloc(isize, TAIL);
-    if ( ! *iworkptr ) {
-       fprintf(stderr, "zLUWorkInit: malloc fails for local iworkptr[]\n");
-       return (isize + n);
-    }
-
-    if ( MemModel == SYSTEM )
-       *dworkptr = (doublecomplex *) SUPERLU_MALLOC(dsize);
-    else {
-       *dworkptr = (doublecomplex *) zuser_malloc(dsize, TAIL);
-       if ( NotDoubleAlign(*dworkptr) ) {
-           old_ptr = *dworkptr;
-           *dworkptr = (doublecomplex*) DoubleAlign(*dworkptr);
-           *dworkptr = (doublecomplex*) ((double*)*dworkptr - 1);
-           extra = (char*)old_ptr - (char*)*dworkptr;
-#ifdef DEBUG       
-           printf("zLUWorkInit: not aligned, extra %d\n", extra);
-#endif     
-           stack.top2 -= extra;
-           stack.used += extra;
-       }
-    }
-    if ( ! *dworkptr ) {
-       fprintf(stderr, "malloc fails for local dworkptr[].");
-       return (isize + dsize + n);
-    }
-       
-    return 0;
-}
-
-
-/*
- * Set up pointers for real working arrays.
- */
-void
-zSetRWork(int m, int panel_size, doublecomplex *dworkptr,
-        doublecomplex **dense, doublecomplex **tempv)
-{
-    doublecomplex zero = {0.0, 0.0};
-
-    int maxsuper = sp_ienv(3),
-        rowblk   = sp_ienv(4);
-    *dense = dworkptr;
-    *tempv = *dense + panel_size*m;
-    zfill (*dense, m * panel_size, zero);
-    zfill (*tempv, NUM_TEMPV(m,panel_size,maxsuper,rowblk), zero);     
-}
-       
-/*
- * Free the working storage used by factor routines.
- */
-void zLUWorkFree(int *iwork, doublecomplex *dwork, GlobalLU_t *Glu)
-{
-    if ( Glu->MemModel == SYSTEM ) {
-       SUPERLU_FREE (iwork);
-       SUPERLU_FREE (dwork);
-    } else {
-       stack.used -= (stack.size - stack.top2);
-       stack.top2 = stack.size;
-/*     zStackCompress(Glu);  */
-    }
-    
-    SUPERLU_FREE (expanders);  
-    expanders = 0;
-}
-
-/* Expand the data structures for L and U during the factorization.
- * Return value:   0 - successful return
- *               > 0 - number of bytes allocated when run out of space
- */
-int
-zLUMemXpand(int jcol,
-          int next,          /* number of elements currently in the factors */
-          MemType mem_type,  /* which type of memory to expand  */
-          int *maxlen,       /* modified - maximum length of a data structure 
*/
-          GlobalLU_t *Glu    /* modified - global LU data structures */
-          )
-{
-    void   *new_mem;
-    
-#ifdef DEBUG    
-    printf("zLUMemXpand(): jcol %d, next %d, maxlen %d, MemType %d\n",
-          jcol, next, *maxlen, mem_type);
-#endif    
-
-    if (mem_type == USUB) 
-       new_mem = zexpand(maxlen, mem_type, next, 1, Glu);
-    else
-       new_mem = zexpand(maxlen, mem_type, next, 0, Glu);
-    
-    if ( !new_mem ) {
-       int    nzlmax  = Glu->nzlmax;
-       int    nzumax  = Glu->nzumax;
-       int    nzlumax = Glu->nzlumax;
-       fprintf(stderr, "Can't expand MemType %d: jcol %d\n", mem_type, jcol);
-       return (zmemory_usage(nzlmax, nzumax, nzlumax, Glu->n) + Glu->n);
-    }
-
-    switch ( mem_type ) {
-      case LUSUP:
-       Glu->lusup   = (doublecomplex *) new_mem;
-       Glu->nzlumax = *maxlen;
-       break;
-      case UCOL:
-       Glu->ucol   = (doublecomplex *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-      case LSUB:
-       Glu->lsub   = (int *) new_mem;
-       Glu->nzlmax = *maxlen;
-       break;
-      case USUB:
-       Glu->usub   = (int *) new_mem;
-       Glu->nzumax = *maxlen;
-       break;
-    }
-    
-    return 0;
-    
-}
-
-
-
-void
-copy_mem_doublecomplex(int howmany, void *old, void *new)
-{
-    register int i;
-    doublecomplex *dold = old;
-    doublecomplex *dnew = new;
-    for (i = 0; i < howmany; i++) dnew[i] = dold[i];
-}
-
-/*
- * Expand the existing storage to accommodate more fill-ins.
- */
-void
-*zexpand (
-        int *prev_len,   /* length used from previous call */
-        MemType type,    /* which part of the memory to expand */
-        int len_to_copy, /* size of the memory to be copied to new store */
-        int keep_prev,   /* = 1: use prev_len;
-                            = 0: compute new_len to expand */
-        GlobalLU_t *Glu  /* modified - global LU data structures */
-       )
-{
-    float    EXPAND = 1.5;
-    float    alpha;
-    void     *new_mem, *old_mem;
-    int      new_len, tries, lword, extra, bytes_to_copy;
-
-    alpha = EXPAND;
-
-    if ( no_expand == 0 || keep_prev ) /* First time allocate requested */
-        new_len = *prev_len;
-    else {
-       new_len = alpha * *prev_len;
-    }
-    
-    if ( type == LSUB || type == USUB ) lword = sizeof(int);
-    else lword = sizeof(doublecomplex);
-
-    if ( Glu->MemModel == SYSTEM ) {
-       new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-       if ( no_expand != 0 ) {
-           tries = 0;
-           if ( keep_prev ) {
-               if ( !new_mem ) return (NULL);
-           } else {
-               while ( !new_mem ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   new_mem = (void *) SUPERLU_MALLOC((size_t)new_len * lword);
-               }
-           }
-           if ( type == LSUB || type == USUB ) {
-               copy_mem_int(len_to_copy, expanders[type].mem, new_mem);
-           } else {
-               copy_mem_doublecomplex(len_to_copy, expanders[type].mem, 
new_mem);
-           }
-           SUPERLU_FREE (expanders[type].mem);
-       }
-       expanders[type].mem = (void *) new_mem;
-       
-    } else { /* MemModel == USER */
-       if ( no_expand == 0 ) {
-           new_mem = zuser_malloc(new_len * lword, HEAD);
-           if ( NotDoubleAlign(new_mem) &&
-               (type == LUSUP || type == UCOL) ) {
-               old_mem = new_mem;
-               new_mem = (void *)DoubleAlign(new_mem);
-               extra = (char*)new_mem - (char*)old_mem;
-#ifdef DEBUG           
-               printf("expand(): not aligned, extra %d\n", extra);
-#endif         
-               stack.top1 += extra;
-               stack.used += extra;
-           }
-           expanders[type].mem = (void *) new_mem;
-       }
-       else {
-           tries = 0;
-           extra = (new_len - *prev_len) * lword;
-           if ( keep_prev ) {
-               if ( StackFull(extra) ) return (NULL);
-           } else {
-               while ( StackFull(extra) ) {
-                   if ( ++tries > 10 ) return (NULL);
-                   alpha = Reduce(alpha);
-                   new_len = alpha * *prev_len;
-                   extra = (new_len - *prev_len) * lword;          
-               }
-           }
-
-           if ( type != USUB ) {
-               new_mem = (void*)((char*)expanders[type + 1].mem + extra);
-               bytes_to_copy = (char*)stack.array + stack.top1
-                   - (char*)expanders[type + 1].mem;
-               user_bcopy(expanders[type+1].mem, new_mem, bytes_to_copy);
-
-               if ( type < USUB ) {
-                   Glu->usub = expanders[USUB].mem =
-                       (void*)((char*)expanders[USUB].mem + extra);
-               }
-               if ( type < LSUB ) {
-                   Glu->lsub = expanders[LSUB].mem =
-                       (void*)((char*)expanders[LSUB].mem + extra);
-               }
-               if ( type < UCOL ) {
-                   Glu->ucol = expanders[UCOL].mem =
-                       (void*)((char*)expanders[UCOL].mem + extra);
-               }
-               stack.top1 += extra;
-               stack.used += extra;
-               if ( type == UCOL ) {
-                   stack.top1 += extra;   /* Add same amount for USUB */
-                   stack.used += extra;
-               }
-               
-           } /* if ... */
-
-       } /* else ... */
-    }
-
-    expanders[type].size = new_len;
-    *prev_len = new_len;
-    if ( no_expand ) ++no_expand;
-    
-    return (void *) expanders[type].mem;
-    
-} /* zexpand */
-
-
-/*
- * Compress the work[] array to remove fragmentation.
- */
-void
-zStackCompress(GlobalLU_t *Glu)
-{
-    register int iword, dword, ndim;
-    char    *last, *fragment;
-    int      *ifrom, *ito;
-    doublecomplex   *dfrom, *dto;
-    int      *xlsub, *lsub, *xusub, *usub, *xlusup;
-    doublecomplex   *ucol, *lusup;
-    
-    iword = sizeof(int);
-    dword = sizeof(doublecomplex);
-    ndim = Glu->n;
-
-    xlsub  = Glu->xlsub;
-    lsub   = Glu->lsub;
-    xusub  = Glu->xusub;
-    usub   = Glu->usub;
-    xlusup = Glu->xlusup;
-    ucol   = Glu->ucol;
-    lusup  = Glu->lusup;
-    
-    dfrom = ucol;
-    dto = (doublecomplex *)((char*)lusup + xlusup[ndim] * dword);
-    copy_mem_doublecomplex(xusub[ndim], dfrom, dto);
-    ucol = dto;
-
-    ifrom = lsub;
-    ito = (int *) ((char*)ucol + xusub[ndim] * iword);
-    copy_mem_int(xlsub[ndim], ifrom, ito);
-    lsub = ito;
-    
-    ifrom = usub;
-    ito = (int *) ((char*)lsub + xlsub[ndim] * iword);
-    copy_mem_int(xusub[ndim], ifrom, ito);
-    usub = ito;
-    
-    last = (char*)usub + xusub[ndim] * iword;
-    fragment = (char*) (((char*)stack.array + stack.top1) - last);
-    stack.used -= (long int) fragment;
-    stack.top1 -= (long int) fragment;
-
-    Glu->ucol = ucol;
-    Glu->lsub = lsub;
-    Glu->usub = usub;
-    
-#ifdef DEBUG
-    printf("zStackCompress: fragment %d\n", fragment);
-    /* for (last = 0; last < ndim; ++last)
-       print_lu_col("After compress:", last, 0);*/
-#endif    
-    
-}
-
-/*
- * Allocate storage for original matrix A
- */
-void
-zallocateA(int n, int nnz, doublecomplex **a, int **asub, int **xa)
-{
-    *a    = (doublecomplex *) doublecomplexMalloc(nnz);
-    *asub = (int *) intMalloc(nnz);
-    *xa   = (int *) intMalloc(n+1);
-}
-
-
-doublecomplex *doublecomplexMalloc(int n)
-{
-    doublecomplex *buf;
-    buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex)); 
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doublecomplexMalloc()\n");
-    }
-    return (buf);
-}
-
-doublecomplex *doublecomplexCalloc(int n)
-{
-    doublecomplex *buf;
-    register int i;
-    doublecomplex zero = {0.0, 0.0};
-    buf = (doublecomplex *) SUPERLU_MALLOC((size_t)n * sizeof(doublecomplex));
-    if ( !buf ) {
-       ABORT("SUPERLU_MALLOC failed for buf in doublecomplexCalloc()\n");
-    }
-    for (i = 0; i < n; ++i) buf[i] = zero;
-    return (buf);
-}
-
-
-int zmemory_usage(const int nzlmax, const int nzumax, 
-                 const int nzlumax, const int n)
-{
-    register int iword, dword;
-
-    iword   = sizeof(int);
-    dword   = sizeof(doublecomplex);
-    
-    return (10 * n * iword +
-           nzlmax * iword + nzumax * (iword + dword) + nzlumax * dword);
-
-}
diff --git a/superlu/zmyblas2.c b/superlu/zmyblas2.c
deleted file mode 100644
index 5f2f3241..00000000
--- a/superlu/zmyblas2.c
+++ /dev/null
@@ -1,203 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:          zmyblas2.c
- * Purpose:
- *     Level 2 BLAS operations: solves and matvec, written in C.
- * Note:
- *     This is only used when the system lacks an efficient BLAS library.
- */
-#include "slu_dcomplex.h"
-
-/*
- * Solves a dense UNIT lower triangular system. The unit lower 
- * triangular matrix is stored in a 2D array M(1:nrow,1:ncol). 
- * The solution will be returned in the rhs vector.
- */
-void zlsolve ( int ldm, int ncol, doublecomplex *M, doublecomplex *rhs )
-{
-    int k;
-    doublecomplex x0, x1, x2, x3, temp;
-    doublecomplex *M0;
-    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-
-    M0 = &M[0];
-
-
-    while ( firstcol < ncol - 3 ) { /* Do 4 columns */
-       Mki0 = M0 + 1;
-       Mki1 = Mki0 + ldm + 1;
-       Mki2 = Mki1 + ldm + 1;
-       Mki3 = Mki2 + ldm + 1;
-
-       x0 = rhs[firstcol];
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x1, &rhs[firstcol+1], &temp);
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x2, &rhs[firstcol+2], &temp);
-       zz_mult(&temp, &x1, Mki1); Mki1++;
-       z_sub(&x2, &x2, &temp);
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x3, &rhs[firstcol+3], &temp);
-       zz_mult(&temp, &x1, Mki1); Mki1++;
-       z_sub(&x3, &x3, &temp);
-       zz_mult(&temp, &x2, Mki2); Mki2++;
-       z_sub(&x3, &x3, &temp);
-
-       rhs[++firstcol] = x1;
-       rhs[++firstcol] = x2;
-       rhs[++firstcol] = x3;
-       ++firstcol;
-    
-       for (k = firstcol; k < ncol; k++) {
-           zz_mult(&temp, &x0, Mki0); Mki0++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x1, Mki1); Mki1++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x2, Mki2); Mki2++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x3, Mki3); Mki3++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-       }
-
-        M0 += 4 * ldm + 4;
-    }
-
-    if ( firstcol < ncol - 1 ) { /* Do 2 columns */
-        Mki0 = M0 + 1;
-        Mki1 = Mki0 + ldm + 1;
-
-        x0 = rhs[firstcol];
-       zz_mult(&temp, &x0, Mki0); Mki0++;
-       z_sub(&x1, &rhs[firstcol+1], &temp);
-
-       rhs[++firstcol] = x1;
-       ++firstcol;
-    
-       for (k = firstcol; k < ncol; k++) {
-           zz_mult(&temp, &x0, Mki0); Mki0++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-           zz_mult(&temp, &x1, Mki1); Mki1++;
-           z_sub(&rhs[k], &rhs[k], &temp);
-       } 
-    }
-    
-}
-
-/*
- * Solves a dense upper triangular system. The upper triangular matrix is
- * stored in a 2-dim array M(1:ldm,1:ncol). The solution will be returned
- * in the rhs vector.
- */
-void
-zusolve ( ldm, ncol, M, rhs )
-int ldm;       /* in */
-int ncol;      /* in */
-doublecomplex *M;      /* in */
-doublecomplex *rhs;    /* modified */
-{
-    doublecomplex xj, temp;
-    int jcol, j, irow;
-
-    jcol = ncol - 1;
-
-    for (j = 0; j < ncol; j++) {
-
-       z_div(&xj, &rhs[jcol], &M[jcol + jcol*ldm]); /* M(jcol, jcol) */
-       rhs[jcol] = xj;
-       
-       for (irow = 0; irow < jcol; irow++) {
-           zz_mult(&temp, &xj, &M[irow+jcol*ldm]); /* M(irow, jcol) */
-           z_sub(&rhs[irow], &rhs[irow], &temp);
-       }
-
-       jcol--;
-
-    }
-}
-
-
-/*
- * Performs a dense matrix-vector multiply: Mxvec = Mxvec + M * vec.
- * The input matrix is M(1:nrow,1:ncol); The product is returned in Mxvec[].
- */
-void zmatvec ( ldm, nrow, ncol, M, vec, Mxvec )
-int ldm;       /* in -- leading dimension of M */
-int nrow;      /* in */ 
-int ncol;      /* in */
-doublecomplex *M;      /* in */
-doublecomplex *vec;    /* in */
-doublecomplex *Mxvec;  /* in/out */
-{
-    doublecomplex vi0, vi1, vi2, vi3;
-    doublecomplex *M0, temp;
-    doublecomplex *Mki0, *Mki1, *Mki2, *Mki3;
-    register int firstcol = 0;
-    int k;
-
-    M0 = &M[0];
-
-    while ( firstcol < ncol - 3 ) {    /* Do 4 columns */
-       Mki0 = M0;
-       Mki1 = Mki0 + ldm;
-       Mki2 = Mki1 + ldm;
-       Mki3 = Mki2 + ldm;
-
-       vi0 = vec[firstcol++];
-       vi1 = vec[firstcol++];
-       vi2 = vec[firstcol++];
-       vi3 = vec[firstcol++];  
-       for (k = 0; k < nrow; k++) {
-           zz_mult(&temp, &vi0, Mki0); Mki0++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-           zz_mult(&temp, &vi1, Mki1); Mki1++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-           zz_mult(&temp, &vi2, Mki2); Mki2++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-           zz_mult(&temp, &vi3, Mki3); Mki3++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-
-       M0 += 4 * ldm;
-    }
-
-    while ( firstcol < ncol ) {                /* Do 1 column */
-       Mki0 = M0;
-       vi0 = vec[firstcol++];
-       for (k = 0; k < nrow; k++) {
-           zz_mult(&temp, &vi0, Mki0); Mki0++;
-           z_add(&Mxvec[k], &Mxvec[k], &temp);
-       }
-       M0 += ldm;
-    }
-       
-}
-
diff --git a/superlu/zpanel_bmod.c b/superlu/zpanel_bmod.c
deleted file mode 100644
index ba9dc0e8..00000000
--- a/superlu/zpanel_bmod.c
+++ /dev/null
@@ -1,477 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
-
-/* 
- * Function prototypes 
- */
-void zlsolve(int, int, doublecomplex *, doublecomplex *);
-void zmatvec(int, int, int, doublecomplex *, doublecomplex *, doublecomplex *);
-extern void zcheck_tempv();
-
-void
-zpanel_bmod (
-           const int  m,          /* in - number of rows in the matrix */
-           const int  w,          /* in */
-           const int  jcol,       /* in */
-           const int  nseg,       /* in */
-           doublecomplex     *dense,     /* out, of size n by w */
-           doublecomplex     *tempv,     /* working array */
-           int        *segrep,    /* in */
-           int        *repfnz,    /* in, of size n by w */
-           GlobalLU_t *Glu,       /* modified */
-           SuperLUStat_t *stat    /* output */
-           )
-{
-/* 
- * Purpose
- * =======
- *
- *    Performs numeric block updates (sup-panel) in topological order.
- *    It features: col-col, 2cols-col, 3cols-col, and sup-col updates.
- *    Special processing on the supernodal portion of L\U[*,j]
- *
- *    Before entering this routine, the original nonzeros in the panel 
- *    were already copied into the spa[m,w].
- *
- *    Updated/Output parameters-
- *     dense[0:m-1,w]: L[*,j:j+w-1] and U[*,j:j+w-1] are returned 
- *      collectively in the m-by-w vector dense[*]. 
- *
- */
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-         ftcs2 = _cptofcd("N", strlen("N")),
-         ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int          incx = 1, incy = 1;
-    doublecomplex       alpha, beta;
-#endif
-
-    register int k, ksub;
-    int          fsupc, nsupc, nsupr, nrow;
-    int          krep, krep_ind;
-    doublecomplex       ukj, ukj1, ukj2;
-    int          luptr, luptr1, luptr2;
-    int          segsze;
-    int          block_nrow;  /* no of rows in a block row */
-    register int lptr;       /* Points to the row subscripts of a supernode */
-    int          kfnz, irow, no_zeros; 
-    register int isub, isub1, i;
-    register int jj;         /* Index through each column in the panel */
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    doublecomplex       *lusup;
-    int          *xlusup;
-    int          *repfnz_col; /* repfnz[] for a column in the panel */
-    doublecomplex       *dense_col;  /* dense[] for a column in the panel */
-    doublecomplex       *tempv1;             /* Used in 1-D update */
-    doublecomplex       *TriTmp, *MatvecTmp; /* used in 2-D update */
-    doublecomplex      zero = {0.0, 0.0};
-    doublecomplex      one = {1.0, 0.0};
-    doublecomplex      comp_temp, comp_temp1;
-    register int ldaTmp;
-    register int r_ind, r_hi;
-    static   int first = 1, maxsuper, rowblk, colblk;
-    flops_t  *ops = stat->ops;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    
-    if ( first ) {
-       maxsuper = sp_ienv(3);
-       rowblk   = sp_ienv(4);
-       colblk   = sp_ienv(5);
-       first = 0;
-    }
-    ldaTmp = maxsuper + rowblk;
-
-    /* 
-     * For each nonz supernode segment of U[*,j] in topological order 
-     */
-    k = nseg - 1;
-    for (ksub = 0; ksub < nseg; ksub++) { /* for each updating supernode */
-
-       /* krep = representative of current k-th supernode
-        * fsupc = first supernodal column
-        * nsupc = no of columns in a supernode
-        * nsupr = no of rows in a supernode
-        */
-        krep = segrep[k--];
-       fsupc = xsup[supno[krep]];
-       nsupc = krep - fsupc + 1;
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nrow = nsupr - nsupc;
-       lptr = xlsub[fsupc];
-       krep_ind = lptr + nsupc - 1;
-
-       repfnz_col = repfnz;
-       dense_col = dense;
-       
-       if ( nsupc >= colblk && nrow > rowblk ) { /* 2-D block update */
-
-           TriTmp = tempv;
-       
-           /* Sequence through each column in panel -- triangular solves */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp ) {
-
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-           
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-       
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       z_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       z_sub(&ukj1, &ukj1, &comp_temp);
-
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           luptr++; luptr1++; luptr2++;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   }
-
-               } else  {       /* segsze >= 4 */
-                   
-                   /* Copy U[*,j] segment from dense[*] to TriTmp[*], which
-                      holds the result of triangular solves.    */
-                   no_zeros = kfnz - fsupc;
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       TriTmp[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#else
-                   ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, TriTmp, &incx );
-#endif
-#else          
-                   zlsolve ( nsupr, segsze, &lusup[luptr], TriTmp );
-#endif
-                   
-
-               } /* else ... */
-           
-           }  /* for jj ... end tri-solves */
-
-           /* Block row updates; push all the way into dense[*] block */
-           for ( r_ind = 0; r_ind < nrow; r_ind += rowblk ) {
-               
-               r_hi = SUPERLU_MIN(nrow, r_ind + rowblk);
-               block_nrow = SUPERLU_MIN(rowblk, r_hi - r_ind);
-               luptr = xlusup[fsupc] + nsupc + r_ind;
-               isub1 = lptr + nsupc + r_ind;
-               
-               repfnz_col = repfnz;
-               TriTmp = tempv;
-               dense_col = dense;
-               
-               /* Sequence through each column in panel -- matrix-vector */
-               for (jj = jcol; jj < jcol + w; jj++,
-                    repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-                   
-                   kfnz = repfnz_col[krep];
-                   if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-                   
-                   segsze = krep - kfnz + 1;
-                   if ( segsze <= 3 ) continue;   /* skip unrolled cases */
-                   
-                   /* Perform a block update, and scatter the result of
-                      matrix-vector to dense[].                 */
-                   no_zeros = kfnz - fsupc;
-                   luptr1 = luptr + nsupr * no_zeros;
-                   MatvecTmp = &TriTmp[maxsuper];
-                   
-#ifdef USE_VENDOR_BLAS
-                   alpha = one; 
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV(ftcs2, &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#else
-                   zgemv_("N", &block_nrow, &segsze, &alpha, &lusup[luptr1], 
-                          &nsupr, TriTmp, &incx, &beta, MatvecTmp, &incy);
-#endif
-#else
-                   zmatvec(nsupr, block_nrow, segsze, &lusup[luptr1],
-                          TriTmp, MatvecTmp);
-#endif
-                   
-                   /* Scatter MatvecTmp[*] into SPA dense[*] temporarily
-                    * such that MatvecTmp[*] can be re-used for the
-                    * the next blok row update. dense[] will be copied into 
-                    * global store after the whole panel has been finished.
-                    */
-                   isub = isub1;
-                   for (i = 0; i < block_nrow; i++) {
-                       irow = lsub[isub];
-                       z_sub(&dense_col[irow], &dense_col[irow], 
-                              &MatvecTmp[i]);
-                       MatvecTmp[i] = zero;
-                       ++isub;
-                   }
-                   
-               } /* for jj ... */
-               
-           } /* for each block row ... */
-           
-           /* Scatter the triangular solves into SPA dense[*] */
-           repfnz_col = repfnz;
-           TriTmp = tempv;
-           dense_col = dense;
-           
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m, TriTmp += ldaTmp) {
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue; /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               if ( segsze <= 3 ) continue; /* skip unrolled cases */
-               
-               no_zeros = kfnz - fsupc;                
-               isub = lptr + no_zeros;
-               for (i = 0; i < segsze; i++) {
-                   irow = lsub[isub];
-                   dense_col[irow] = TriTmp[i];
-                   TriTmp[i] = zero;
-                   ++isub;
-               }
-               
-           } /* for jj ... */
-           
-       } else { /* 1-D block modification */
-           
-           
-           /* Sequence through each column in the panel */
-           for (jj = jcol; jj < jcol + w; jj++,
-                repfnz_col += m, dense_col += m) {
-               
-               kfnz = repfnz_col[krep];
-               if ( kfnz == EMPTY ) continue;  /* Skip any zero segment */
-               
-               segsze = krep - kfnz + 1;
-               luptr = xlusup[fsupc];
-
-               ops[TRSV] += 4 * segsze * (segsze - 1);
-               ops[GEMV] += 8 * nrow * segsze;
-               
-               /* Case 1: Update U-segment of size 1 -- col-col update */
-               if ( segsze == 1 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc;
-
-                   for (i = lptr + nsupc; i < xlsub[fsupc+1]; i++) {
-                       irow = lsub[i];
-                       zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                       z_sub(&dense_col[irow], &dense_col[irow], &comp_temp);
-                       ++luptr;
-                   }
-
-               } else if ( segsze <= 3 ) {
-                   ukj = dense_col[lsub[krep_ind]];
-                   luptr += nsupr*(nsupc-1) + nsupc-1;
-                   ukj1 = dense_col[lsub[krep_ind - 1]];
-                   luptr1 = luptr - nsupr;
-
-                   if ( segsze == 2 ) {
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr;  ++luptr1;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   } else {
-                       ukj2 = dense_col[lsub[krep_ind - 2]];
-                       luptr2 = luptr1 - nsupr;
-                       zz_mult(&comp_temp, &ukj2, &lusup[luptr2-1]);
-                       z_sub(&ukj1, &ukj1, &comp_temp);
-
-                       zz_mult(&comp_temp, &ukj1, &lusup[luptr1]);
-                       zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                       z_add(&comp_temp, &comp_temp, &comp_temp1);
-                       z_sub(&ukj, &ukj, &comp_temp);
-                       dense_col[lsub[krep_ind]] = ukj;
-                       dense_col[lsub[krep_ind-1]] = ukj1;
-                       for (i = lptr + nsupc; i < xlsub[fsupc+1]; ++i) {
-                           irow = lsub[i];
-                           ++luptr; ++luptr1; ++luptr2;
-                           zz_mult(&comp_temp, &ukj, &lusup[luptr]);
-                           zz_mult(&comp_temp1, &ukj1, &lusup[luptr1]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           zz_mult(&comp_temp1, &ukj2, &lusup[luptr2]);
-                           z_add(&comp_temp, &comp_temp, &comp_temp1);
-                           z_sub(&dense_col[irow], &dense_col[irow], 
&comp_temp);
-                       }
-                   }
-
-               } else  { /* segsze >= 4 */
-                   /* 
-                    * Perform a triangular solve and block update,
-                    * then scatter the result of sup-col update to dense[].
-                    */
-                   no_zeros = kfnz - fsupc;
-                   
-                   /* Copy U[*,j] segment from dense[*] to tempv[*]: 
-                    *    The result of triangular solve is in tempv[*];
-                    *    The result of matrix vector update is in dense_col[*]
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; ++i) {
-                       irow = lsub[isub];
-                       tempv[i] = dense_col[irow]; /* Gather */
-                       ++isub;
-                   }
-                   
-                   /* start effective triangle */
-                   luptr += nsupr * no_zeros + no_zeros;
-                   
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV( ftcs1, ftcs2, ftcs3, &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#else
-                   ztrsv_( "L", "N", "U", &segsze, &lusup[luptr], 
-                          &nsupr, tempv, &incx );
-#endif
-                   
-                   luptr += segsze;    /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                    alpha = one;
-                    beta = zero;
-#ifdef _CRAY
-                   CGEMV( ftcs2, &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#else
-                   zgemv_( "N", &nrow, &segsze, &alpha, &lusup[luptr], 
-                          &nsupr, tempv, &incx, &beta, tempv1, &incy );
-#endif
-#else
-                   zlsolve ( nsupr, segsze, &lusup[luptr], tempv );
-                   
-                   luptr += segsze;        /* Dense matrix-vector */
-                   tempv1 = &tempv[segsze];
-                   zmatvec (nsupr, nrow, segsze, &lusup[luptr], tempv, tempv1);
-#endif
-                   
-                   /* Scatter tempv[*] into SPA dense[*] temporarily, such
-                    * that tempv[*] can be used for the triangular solve of
-                    * the next column of the panel. They will be copied into 
-                    * ucol[*] after the whole panel has been finished.
-                    */
-                   isub = lptr + no_zeros;
-                   for (i = 0; i < segsze; i++) {
-                       irow = lsub[isub];
-                       dense_col[irow] = tempv[i];
-                       tempv[i] = zero;
-                       isub++;
-                   }
-                   
-                   /* Scatter the update from tempv1[*] into SPA dense[*] */
-                   /* Start dense rectangular L */
-                   for (i = 0; i < nrow; i++) {
-                       irow = lsub[isub];
-                       z_sub(&dense_col[irow], &dense_col[irow], &tempv1[i]);
-                       tempv1[i] = zero;
-                       ++isub; 
-                   }
-                   
-               } /* else segsze>=4 ... */
-               
-           } /* for each column in the panel... */
-           
-       } /* else 1-D update ... */
-
-    } /* for each updating supernode ... */
-
-}
-
-
-
diff --git a/superlu/zpanel_dfs.c b/superlu/zpanel_dfs.c
deleted file mode 100644
index 4fbc963e..00000000
--- a/superlu/zpanel_dfs.c
+++ /dev/null
@@ -1,256 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_zdefs.h"
-
-void
-zpanel_dfs (
-          const int  m,           /* in - number of rows in the matrix */
-          const int  w,           /* in */
-          const int  jcol,        /* in */
-          SuperMatrix *A,       /* in - original matrix */
-          int        *perm_r,     /* in */
-          int        *nseg,       /* out */
-          doublecomplex     *dense,      /* out */
-          int        *panel_lsub, /* out */
-          int        *segrep,     /* out */
-          int        *repfnz,     /* out */
-          int        *xprune,     /* out */
-          int        *marker,     /* out */     
-          int        *parent,     /* working array */
-          int        *xplore,     /* working array */
-          GlobalLU_t *Glu         /* modified */
-          )
-{
-/*
- * Purpose
- * =======
- *
- *   Performs a symbolic factorization on a panel of columns [jcol, jcol+w).
- *
- *   A supernode representative is the last column of a supernode.
- *   The nonzeros in U[*,j] are segments that end at supernodal
- *   representatives.
- *
- *   The routine returns one list of the supernodal representatives
- *   in topological order of the dfs that generates them. This list is
- *   a superset of the topological order of each individual column within
- *   the panel. 
- *   The location of the first nonzero in each supernodal segment
- *   (supernodal entry location) is also returned. Each column has a 
- *   separate list for this purpose.
- *
- *   Two marker arrays are used for dfs:
- *     marker[i] == jj, if i was visited during dfs of current column jj;
- *     marker1[i] >= jcol, if i was visited by earlier columns in this panel;
- *
- *   marker: A-row --> A-row/col (0/1)
- *   repfnz: SuperA-col --> PA-row
- *   parent: SuperA-col --> SuperA-col
- *   xplore: SuperA-col --> index to L-structure
- *
- */
-    NCPformat *Astore;
-    doublecomplex    *a;
-    int       *asub;
-    int       *xa_begin, *xa_end;
-    int              krep, chperm, chmark, chrep, oldrep, kchild, myfnz;
-    int       k, krow, kmark, kperm;
-    int       xdfs, maxdfs, kpar;
-    int       jj;         /* index through each column in the panel */
-    int       *marker1;           /* marker1[jj] >= jcol if vertex jj was 
visited 
-                             by a previous column within this panel.   */
-    int       *repfnz_col; /* start of each column in the panel */
-    doublecomplex    *dense_col;  /* start of each column in the panel */
-    int       nextl_col;   /* next available position in panel_lsub[*,jj] */
-    int       *xsup, *supno;
-    int       *lsub, *xlsub;
-
-    /* Initialize pointers */
-    Astore     = A->Store;
-    a          = Astore->nzval;
-    asub       = Astore->rowind;
-    xa_begin   = Astore->colbeg;
-    xa_end     = Astore->colend;
-    marker1    = marker + m;
-    repfnz_col = repfnz;
-    dense_col  = dense;
-    *nseg      = 0;
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-
-    /* For each column in the panel */
-    for (jj = jcol; jj < jcol + w; jj++) {
-       nextl_col = (jj - jcol) * m;
-
-#ifdef CHK_DFS
-       printf("\npanel col %d: ", jj);
-#endif
-
-       /* For each nonz in A[*,jj] do dfs */
-       for (k = xa_begin[jj]; k < xa_end[jj]; k++) {
-           krow = asub[k];
-            dense_col[krow] = a[k];
-           kmark = marker[krow];       
-           if ( kmark == jj ) 
-               continue;     /* krow visited before, go to the next nonzero */
-
-           /* For each unmarked nbr krow of jj
-            * krow is in L: place it in structure of L[*,jj]
-            */
-           marker[krow] = jj;
-           kperm = perm_r[krow];
-           
-           if ( kperm == EMPTY ) {
-               panel_lsub[nextl_col++] = krow; /* krow is indexed into A */
-           }
-           /* 
-            * krow is in U: if its supernode-rep krep
-            * has been explored, update repfnz[*]
-            */
-           else {
-               
-               krep = xsup[supno[kperm]+1] - 1;
-               myfnz = repfnz_col[krep];
-               
-#ifdef CHK_DFS
-               printf("krep %d, myfnz %d, perm_r[%d] %d\n", krep, myfnz, krow, 
kperm);
-#endif
-               if ( myfnz != EMPTY ) { /* Representative visited before */
-                   if ( myfnz > kperm ) repfnz_col[krep] = kperm;
-                   /* continue; */
-               }
-               else {
-                   /* Otherwise, perform dfs starting at krep */
-                   oldrep = EMPTY;
-                   parent[krep] = oldrep;
-                   repfnz_col[krep] = kperm;
-                   xdfs = xlsub[krep];
-                   maxdfs = xprune[krep];
-                   
-#ifdef CHK_DFS 
-                   printf("  xdfs %d, maxdfs %d: ", xdfs, maxdfs);
-                   for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                   printf("\n");
-#endif
-                   do {
-                       /* 
-                        * For each unmarked kchild of krep 
-                        */
-                       while ( xdfs < maxdfs ) {
-                           
-                           kchild = lsub[xdfs];
-                           xdfs++;
-                           chmark = marker[kchild];
-                           
-                           if ( chmark != jj ) { /* Not reached yet */
-                               marker[kchild] = jj;
-                               chperm = perm_r[kchild];
-                             
-                               /* Case kchild is in L: place it in L[*,j] */
-                               if ( chperm == EMPTY ) {
-                                   panel_lsub[nextl_col++] = kchild;
-                               } 
-                               /* Case kchild is in U: 
-                                *   chrep = its supernode-rep. If its rep has 
-                                *   been explored, update its repfnz[*]
-                                */
-                               else {
-                                   
-                                   chrep = xsup[supno[chperm]+1] - 1;
-                                   myfnz = repfnz_col[chrep];
-#ifdef CHK_DFS
-                                   printf("chrep %d,myfnz %d,perm_r[%d] 
%d\n",chrep,myfnz,kchild,chperm);
-#endif
-                                   if ( myfnz != EMPTY ) { /* Visited before */
-                                       if ( myfnz > chperm )
-                                           repfnz_col[chrep] = chperm;
-                                   }
-                                   else {
-                                       /* Cont. dfs at snode-rep of kchild */
-                                       xplore[krep] = xdfs;    
-                                       oldrep = krep;
-                                       krep = chrep; /* Go deeper down G(L) */
-                                       parent[krep] = oldrep;
-                                       repfnz_col[krep] = chperm;
-                                       xdfs = xlsub[krep];     
-                                       maxdfs = xprune[krep];
-#ifdef CHK_DFS 
-                                       printf("  xdfs %d, maxdfs %d: ", xdfs, 
maxdfs);
-                                       for (i = xdfs; i < maxdfs; i++) 
printf(" %d", lsub[i]); 
-                                       printf("\n");
-#endif
-                                   } /* else */
-                                 
-                               } /* else */
-                             
-                           } /* if... */
-                           
-                       } /* while xdfs < maxdfs */
-                       
-                       /* krow has no more unexplored nbrs:
-                        *    Place snode-rep krep in postorder DFS, if this 
-                        *    segment is seen for the first time. (Note that
-                        *    "repfnz[krep]" may change later.)
-                        *    Backtrack dfs to its parent.
-                        */
-                       if ( marker1[krep] < jcol ) {
-                           segrep[*nseg] = krep;
-                           ++(*nseg);
-                           marker1[krep] = jj;
-                       }
-                       
-                       kpar = parent[krep]; /* Pop stack, mimic recursion */
-                       if ( kpar == EMPTY ) break; /* dfs done */
-                       krep = kpar;
-                       xdfs = xplore[krep];
-                       maxdfs = xprune[krep];
-                       
-#ifdef CHK_DFS 
-                       printf("  pop stack: krep %d,xdfs %d,maxdfs %d: ", 
krep,xdfs,maxdfs);
-                       for (i = xdfs; i < maxdfs; i++) printf(" %d", lsub[i]);
-                       printf("\n");
-#endif
-                   } while ( kpar != EMPTY ); /* do-while - until empty stack 
*/
-                   
-               } /* else */
-               
-           } /* else */
-           
-       } /* for each nonz in A[*,jj] */
-       
-       repfnz_col += m;    /* Move to next column */
-        dense_col += m;
-       
-    } /* for jj ... */
-    
-}
diff --git a/superlu/zpivotL.c b/superlu/zpivotL.c
deleted file mode 100644
index 484ebfa2..00000000
--- a/superlu/zpivotL.c
+++ /dev/null
@@ -1,171 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-
-#undef DEBUG
-
-int
-zpivotL(
-        const int  jcol,     /* in */
-        const double u,      /* in - diagonal pivoting threshold */
-        int        *usepr,   /* re-use the pivot sequence given by 
perm_r/iperm_r */
-        int        *perm_r,  /* may be modified */
-        int        *iperm_r, /* in - inverse of perm_r */
-        int        *iperm_c, /* in - used to find diagonal of Pc*A*Pc' */
-        int        *pivrow,  /* out */
-        GlobalLU_t *Glu,     /* modified - global LU data structures */
-       SuperLUStat_t *stat  /* output */
-       )
-{
-/*
- * Purpose
- * =======
- *   Performs the numerical pivoting on the current column of L,
- *   and the CDIV operation.
- *
- *   Pivot policy:
- *   (1) Compute thresh = u * max_(i>=j) abs(A_ij);
- *   (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN
- *           pivot row = k;
- *       ELSE IF abs(A_jj) >= thresh THEN
- *           pivot row = j;
- *       ELSE
- *           pivot row = m;
- * 
- *   Note: If you absolutely want to use a given pivot order, then set u=0.0.
- *
- *   Return value: 0      success;
- *                 i > 0  U(i,i) is exactly zero.
- *
- */
-    doublecomplex one = {1.0, 0.0};
-    int          fsupc;            /* first column in the supernode */
-    int          nsupc;            /* no of columns in the supernode */
-    int          nsupr;     /* no of rows in the supernode */
-    int          lptr;     /* points to the starting subscript of the 
supernode */
-    int          pivptr, old_pivptr, diag, diagind;
-    double       pivmax, rtemp, thresh;
-    doublecomplex       temp;
-    doublecomplex       *lu_sup_ptr; 
-    doublecomplex       *lu_col_ptr;
-    int          *lsub_ptr;
-    int          isub, icol, k, itemp;
-    int          *lsub, *xlsub;
-    doublecomplex       *lusup;
-    int          *xlusup;
-    flops_t      *ops = stat->ops;
-
-    /* Initialize pointers */
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    fsupc      = (Glu->xsup)[(Glu->supno)[jcol]];
-    nsupc      = jcol - fsupc;         /* excluding jcol; nsupc >= 0 */
-    lptr       = xlsub[fsupc];
-    nsupr      = xlsub[fsupc+1] - lptr;
-    lu_sup_ptr = &lusup[xlusup[fsupc]];        /* start of the current 
supernode */
-    lu_col_ptr = &lusup[xlusup[jcol]]; /* start of jcol in the supernode */
-    lsub_ptr   = &lsub[lptr];  /* start of row indices of the supernode */
-
-#ifdef DEBUG
-if ( jcol == MIN_COL ) {
-    printf("Before cdiv: col %d\n", jcol);
-    for (k = nsupc; k < nsupr; k++) 
-       printf("  lu[%d] %f\n", lsub_ptr[k], lu_col_ptr[k]);
-}
-#endif
-    
-    /* Determine the largest abs numerical value for partial pivoting;
-       Also search for user-specified pivot, and diagonal element. */
-    if ( *usepr ) *pivrow = iperm_r[jcol];
-    diagind = iperm_c[jcol];
-    pivmax = 0.0;
-    pivptr = nsupc;
-    diag = EMPTY;
-    old_pivptr = nsupc;
-    for (isub = nsupc; isub < nsupr; ++isub) {
-        rtemp = z_abs1 (&lu_col_ptr[isub]);
-       if ( rtemp > pivmax ) {
-           pivmax = rtemp;
-           pivptr = isub;
-       }
-       if ( *usepr && lsub_ptr[isub] == *pivrow ) old_pivptr = isub;
-       if ( lsub_ptr[isub] == diagind ) diag = isub;
-    }
-
-    /* Test for singularity */
-    if ( pivmax == 0.0 ) {
-       *pivrow = lsub_ptr[pivptr];
-       perm_r[*pivrow] = jcol;
-       *usepr = 0;
-       return (jcol+1);
-    }
-
-    thresh = u * pivmax;
-    
-    /* Choose appropriate pivotal element by our policy. */
-    if ( *usepr ) {
-        rtemp = z_abs1 (&lu_col_ptr[old_pivptr]);
-       if ( rtemp != 0.0 && rtemp >= thresh )
-           pivptr = old_pivptr;
-       else
-           *usepr = 0;
-    }
-    if ( *usepr == 0 ) {
-       /* Use diagonal pivot? */
-       if ( diag >= 0 ) { /* diagonal exists */
-            rtemp = z_abs1 (&lu_col_ptr[diag]);
-           if ( rtemp != 0.0 && rtemp >= thresh ) pivptr = diag;
-        }
-       *pivrow = lsub_ptr[pivptr];
-    }
-    
-    /* Record pivot row */
-    perm_r[*pivrow] = jcol;
-    
-    /* Interchange row subscripts */
-    if ( pivptr != nsupc ) {
-       itemp = lsub_ptr[pivptr];
-       lsub_ptr[pivptr] = lsub_ptr[nsupc];
-       lsub_ptr[nsupc] = itemp;
-
-       /* Interchange numerical values as well, for the whole snode, such 
-        * that L is indexed the same way as A.
-        */
-       for (icol = 0; icol <= nsupc; icol++) {
-           itemp = pivptr + icol * nsupr;
-           temp = lu_sup_ptr[itemp];
-           lu_sup_ptr[itemp] = lu_sup_ptr[nsupc + icol*nsupr];
-           lu_sup_ptr[nsupc + icol*nsupr] = temp;
-       }
-    } /* if */
-
-    /* cdiv operation */
-    ops[FACT] += 10 * (nsupr - nsupc);
-
-    z_div(&temp, &one, &lu_col_ptr[nsupc]);
-    for (k = nsupc+1; k < nsupr; k++) 
-       zz_mult(&lu_col_ptr[k], &lu_col_ptr[k], &temp);
-
-    return 0;
-}
-
diff --git a/superlu/zpivotgrowth.c b/superlu/zpivotgrowth.c
deleted file mode 100644
index 1d598cf0..00000000
--- a/superlu/zpivotgrowth.c
+++ /dev/null
@@ -1,129 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#include <math.h>
-#include "slu_zdefs.h"
-
-double
-zPivotGrowth(int ncols, SuperMatrix *A, int *perm_c, 
-             SuperMatrix *L, SuperMatrix *U)
-{
-/*
- * Purpose
- * =======
- *
- * Compute the reciprocal pivot growth factor of the leading ncols columns
- * of the matrix, using the formula:
- *     min_j ( max_i(abs(A_ij)) / max_i(abs(U_ij)) )
- *
- * Arguments
- * =========
- *
- * ncols    (input) int
- *          The number of columns of matrices A, L and U.
- *
- * A        (input) SuperMatrix*
- *         Original matrix A, permuted by columns, of dimension
- *          (A->nrow, A->ncol). The type of A can be:
- *          Stype = NC; Dtype = SLU_Z; Mtype = GE.
- *
- * L        (output) SuperMatrix*
- *          The factor L from the factorization Pr*A=L*U; use compressed row 
- *          subscripts storage for supernodes, i.e., L has type: 
- *          Stype = SC; Dtype = SLU_Z; Mtype = TRLU.
- *
- * U        (output) SuperMatrix*
- *         The factor U from the factorization Pr*A*Pc=L*U. Use column-wise
- *          storage scheme, i.e., U has types: Stype = NC;
- *          Dtype = SLU_Z; Mtype = TRU.
- *
- */
-    NCformat *Astore;
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex  *Aval, *Lval, *Uval;
-    int      fsupc, nsupr, luptr, nz_in_U;
-    int      i, j, k, oldcol;
-    int      *inv_perm_c;
-    double   rpg, maxaj, maxuj;
-    extern   double dlamch_(char *);
-    double   smlnum;
-    doublecomplex   *luval;
-    doublecomplex   temp_comp;
-   
-    /* Get machine constants. */
-    smlnum = dlamch_("S");
-    rpg = 1. / smlnum;
-
-    Astore = A->Store;
-    Lstore = L->Store;
-    Ustore = U->Store;
-    Aval = Astore->nzval;
-    Lval = Lstore->nzval;
-    Uval = Ustore->nzval;
-    
-    inv_perm_c = (int *) SUPERLU_MALLOC(A->ncol*sizeof(int));
-    for (j = 0; j < A->ncol; ++j) inv_perm_c[perm_c[j]] = j;
-
-    for (k = 0; k <= Lstore->nsuper; ++k) {
-       fsupc = L_FST_SUPC(k);
-       nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-       luptr = L_NZ_START(fsupc);
-       luval = &Lval[luptr];
-       nz_in_U = 1;
-       
-       for (j = fsupc; j < L_FST_SUPC(k+1) && j < ncols; ++j) {
-           maxaj = 0.;
-            oldcol = inv_perm_c[j];
-           for (i = Astore->colptr[oldcol]; i < Astore->colptr[oldcol+1]; ++i)
-               maxaj = SUPERLU_MAX( maxaj, z_abs1( &Aval[i]) );
-       
-           maxuj = 0.;
-           for (i = Ustore->colptr[j]; i < Ustore->colptr[j+1]; i++)
-               maxuj = SUPERLU_MAX( maxuj, z_abs1( &Uval[i]) );
-           
-           /* Supernode */
-           for (i = 0; i < nz_in_U; ++i)
-               maxuj = SUPERLU_MAX( maxuj, z_abs1( &luval[i]) );
-
-           ++nz_in_U;
-           luval += nsupr;
-
-           if ( maxuj == 0. )
-               rpg = SUPERLU_MIN( rpg, 1.);
-           else
-               rpg = SUPERLU_MIN( rpg, maxaj / maxuj );
-       }
-       
-       if ( j >= ncols ) break;
-    }
-
-    SUPERLU_FREE(inv_perm_c);
-    return (rpg);
-}
diff --git a/superlu/zpruneL.c b/superlu/zpruneL.c
deleted file mode 100644
index 854038d4..00000000
--- a/superlu/zpruneL.c
+++ /dev/null
@@ -1,156 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_zdefs.h"
-
-void
-zpruneL(
-       const int  jcol,             /* in */
-       const int  *perm_r,   /* in */
-       const int  pivrow,    /* in */
-       const int  nseg,             /* in */
-       const int  *segrep,   /* in */
-       const int  *repfnz,   /* in */
-       int        *xprune,   /* out */
-       GlobalLU_t *Glu       /* modified - global LU data structures */
-       )
-{
-/*
- * Purpose
- * =======
- *   Prunes the L-structure of supernodes whose L-structure
- *   contains the current pivot row "pivrow"
- *
- */
-    doublecomplex     utemp;
-    int        jsupno, irep, irep1, kmin, kmax, krow, movnum;
-    int        i, ktemp, minloc, maxloc;
-    int        do_prune; /* logical variable */
-    int        *xsup, *supno;
-    int        *lsub, *xlsub;
-    doublecomplex     *lusup;
-    int        *xlusup;
-
-    xsup       = Glu->xsup;
-    supno      = Glu->supno;
-    lsub       = Glu->lsub;
-    xlsub      = Glu->xlsub;
-    lusup      = Glu->lusup;
-    xlusup     = Glu->xlusup;
-    
-    /*
-     * For each supernode-rep irep in U[*,j]
-     */
-    jsupno = supno[jcol];
-    for (i = 0; i < nseg; i++) {
-
-       irep = segrep[i];
-       irep1 = irep + 1;
-       do_prune = FALSE;
-
-       /* Don't prune with a zero U-segment */
-       if ( repfnz[irep] == EMPTY )
-               continue;
-
-       /* If a snode overlaps with the next panel, then the U-segment 
-        * is fragmented into two parts -- irep and irep1. We should let
-        * pruning occur at the rep-column in irep1's snode. 
-        */
-       if ( supno[irep] == supno[irep1] )      /* Don't prune */
-               continue;
-
-       /*
-        * If it has not been pruned & it has a nonz in row L[pivrow,i]
-        */
-       if ( supno[irep] != jsupno ) {
-           if ( xprune[irep] >= xlsub[irep1] ) {
-               kmin = xlsub[irep];
-               kmax = xlsub[irep1] - 1;
-               for (krow = kmin; krow <= kmax; krow++) 
-                   if ( lsub[krow] == pivrow ) {
-                       do_prune = TRUE;
-                       break;
-                   }
-           }
-           
-           if ( do_prune ) {
-
-               /* Do a quicksort-type partition
-                * movnum=TRUE means that the num values have to be exchanged.
-                */
-               movnum = FALSE;
-               if ( irep == xsup[supno[irep]] ) /* Snode of size 1 */
-                       movnum = TRUE;
-
-               while ( kmin <= kmax ) {
-
-                   if ( perm_r[lsub[kmax]] == EMPTY ) 
-                       kmax--;
-                   else if ( perm_r[lsub[kmin]] != EMPTY )
-                       kmin++;
-                   else { /* kmin below pivrow, and kmax above pivrow: 
-                           *   interchange the two subscripts
-                           */
-                       ktemp = lsub[kmin];
-                       lsub[kmin] = lsub[kmax];
-                       lsub[kmax] = ktemp;
-
-                       /* If the supernode has only one column, then we
-                        * only keep one set of subscripts. For any subscript 
-                        * interchange performed, similar interchange must be 
-                        * done on the numerical values.
-                        */
-                       if ( movnum ) {
-                           minloc = xlusup[irep] + (kmin - xlsub[irep]);
-                           maxloc = xlusup[irep] + (kmax - xlsub[irep]);
-                           utemp = lusup[minloc];
-                           lusup[minloc] = lusup[maxloc];
-                           lusup[maxloc] = utemp;
-                       }
-
-                       kmin++;
-                       kmax--;
-
-                   }
-
-               } /* while */
-
-               xprune[irep] = kmin;    /* Pruning */
-
-#ifdef CHK_PRUNE
-       printf("    After zpruneL(),using col %d:  xprune[%d] = %d\n", 
-                       jcol, irep, kmin);
-#endif
-           } /* if do_prune */
-
-       } /* if */
-
-    } /* for each U-segment... */
-}
diff --git a/superlu/zreadhb.c b/superlu/zreadhb.c
deleted file mode 100644
index 0522a67a..00000000
--- a/superlu/zreadhb.c
+++ /dev/null
@@ -1,286 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-#include <stdio.h>
-#include <stdlib.h>
-#include "slu_zdefs.h"
-
-
-/* Eat up the rest of the current line */
-int zDumpLine(FILE *fp)
-{
-    register int c;
-    while ((c = fgetc(fp)) != '\n') ;
-    return 0;
-}
-
-int zParseIntFormat(char *buf, int *num, int *size)
-{
-    char *tmp;
-
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    sscanf(tmp, "%d", num);
-    while (*tmp != 'I' && *tmp != 'i') ++tmp;
-    ++tmp;
-    sscanf(tmp, "%d", size);
-    return 0;
-}
-
-int zParseFloatFormat(char *buf, int *num, int *size)
-{
-    char *tmp, *period;
-    
-    tmp = buf;
-    while (*tmp++ != '(') ;
-    *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-    while (*tmp != 'E' && *tmp != 'e' && *tmp != 'D' && *tmp != 'd'
-          && *tmp != 'F' && *tmp != 'f') {
-        /* May find kP before nE/nD/nF, like (1P6F13.6). In this case the
-           num picked up refers to P, which should be skipped. */
-        if (*tmp=='p' || *tmp=='P') {
-           ++tmp;
-           *num = atoi(tmp); /*sscanf(tmp, "%d", num);*/
-        } else {
-           ++tmp;
-        }
-    }
-    ++tmp;
-    period = tmp;
-    while (*period != '.' && *period != ')') ++period ;
-    *period = '\0';
-    *size = atoi(tmp); /*sscanf(tmp, "%2d", size);*/
-
-    return 0;
-}
-
-int zReadVector(FILE *fp, int n, int *where, int perline, int persize)
-{
-    register int i, j, item;
-    char tmp, buf[100], *dummy;
-    
-    i = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           item = atoi(&buf[j*persize]); 
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-           where[i++] = item - 1;
-       }
-    }
-
-    return 0;
-}
-
-/* Read complex numbers as pairs of (real, imaginary) */
-int zReadValues(FILE *fp, int n, doublecomplex *destination, int perline, int 
persize)
-{
-    register int i, j, k, s, pair;
-    register double realpart;
-    char tmp, buf[100], *dummy;
-    
-    i = pair = 0;
-    while (i < n) {
-       dummy = fgets(buf, 100, fp);    /* read a line at a time */
-       for (j=0; j<perline && i<n; j++) {
-           tmp = buf[(j+1)*persize];     /* save the char at that place */
-           buf[(j+1)*persize] = 0;       /* null terminate */
-           s = j*persize;
-           for (k = 0; k < persize; ++k) /* No D_ format in C */
-               if ( buf[s+k] == 'D' || buf[s+k] == 'd' ) buf[s+k] = 'E';
-           if ( pair == 0 ) {
-               /* The value is real part */
-               realpart = atof(&buf[s]);
-               pair = 1;
-           } else {
-               /* The value is imaginary part */
-               destination[i].r = realpart;
-               destination[i++].i = atof(&buf[s]);
-               pair = 0;
-           }
-           buf[(j+1)*persize] = tmp;     /* recover the char at that place */
-       }
-    }
-
-    return 0;
-}
-
-
-void
-zreadhb(int *nrow, int *ncol, int *nonz,
-       doublecomplex **nzval, int **rowind, int **colptr)
-{
-/* 
- * Purpose
- * =======
- * 
- * Read a DOUBLE COMPLEX PRECISION matrix stored in Harwell-Boeing format 
- * as described below.
- * 
- * Line 1 (A72,A8) 
- *     Col. 1 - 72   Title (TITLE) 
- *     Col. 73 - 80  Key (KEY) 
- * 
- * Line 2 (5I14) 
- *     Col. 1 - 14   Total number of lines excluding header (TOTCRD) 
- *     Col. 15 - 28  Number of lines for pointers (PTRCRD) 
- *     Col. 29 - 42  Number of lines for row (or variable) indices (INDCRD) 
- *     Col. 43 - 56  Number of lines for numerical values (VALCRD) 
- *     Col. 57 - 70  Number of lines for right-hand sides (RHSCRD) 
- *                    (including starting guesses and solution vectors 
- *                    if present) 
- *                   (zero indicates no right-hand side data is present) 
- *
- * Line 3 (A3, 11X, 4I14) 
- *     Col. 1 - 3    Matrix type (see below) (MXTYPE) 
- *     Col. 15 - 28  Number of rows (or variables) (NROW) 
- *     Col. 29 - 42  Number of columns (or elements) (NCOL) 
- *     Col. 43 - 56  Number of row (or variable) indices (NNZERO) 
- *                   (equal to number of entries for assembled matrices) 
- *     Col. 57 - 70  Number of elemental matrix entries (NELTVL) 
- *                   (zero in the case of assembled matrices) 
- * Line 4 (2A16, 2A20) 
- *     Col. 1 - 16   Format for pointers (PTRFMT) 
- *     Col. 17 - 32  Format for row (or variable) indices (INDFMT) 
- *     Col. 33 - 52  Format for numerical values of coefficient matrix 
(VALFMT) 
- *     Col. 53 - 72 Format for numerical values of right-hand sides (RHSFMT) 
- *
- * Line 5 (A3, 11X, 2I14) Only present if there are right-hand sides present 
- *     Col. 1        Right-hand side type: 
- *                       F for full storage or M for same format as matrix 
- *     Col. 2        G if a starting vector(s) (Guess) is supplied. (RHSTYP) 
- *     Col. 3        X if an exact solution vector(s) is supplied. 
- *     Col. 15 - 28  Number of right-hand sides (NRHS) 
- *     Col. 29 - 42  Number of row indices (NRHSIX) 
- *                   (ignored in case of unassembled matrices) 
- *
- * The three character type field on line 3 describes the matrix type. 
- * The following table lists the permitted values for each of the three 
- * characters. As an example of the type field, RSA denotes that the matrix 
- * is real, symmetric, and assembled. 
- *
- * First Character: 
- *     R Real matrix 
- *     C Complex matrix 
- *     P Pattern only (no numerical values supplied) 
- *
- * Second Character: 
- *     S Symmetric 
- *     U Unsymmetric 
- *     H Hermitian 
- *     Z Skew symmetric 
- *     R Rectangular 
- *
- * Third Character: 
- *     A Assembled 
- *     E Elemental matrices (unassembled) 
- *
- */
-
-  register int i, numer_lines = 0, rhscrd = 0, dummy;
-    int tmp, colnum, colsize, rownum, rowsize, valnum, valsize;
-    char buf[100], type[4], key[10], *dummyc;
-    FILE *fp;
-
-    fp = stdin;
-
-    /* Line 1 */
-    dummyc = fgets(buf, 100, fp);
-    fputs(buf, stdout);
-#if 0
-    dummy = fscanf(fp, "%72c", buf); buf[72] = 0;
-    printf("Title: %s", buf);
-    dummy += fscanf(fp, "%8c", key);  key[8] = 0;
-    printf("Key: %s\n", key);
-    zDumpLine(fp);
-#endif
-
-    /* Line 2 */
-    for (i=0; i<5; i++) {
-       dummy += fscanf(fp, "%14c", buf); buf[14] = 0;
-       sscanf(buf, "%d", &tmp);
-       if (i == 3) numer_lines = tmp;
-       if (i == 4 && tmp) rhscrd = tmp;
-    }
-    zDumpLine(fp);
-
-    /* Line 3 */
-    dummy += fscanf(fp, "%3c", type);
-    dummy += fscanf(fp, "%11c", buf); /* pad */
-    type[3] = 0;
-#ifdef DEBUG
-    printf("Matrix type %s\n", type);
-#endif
-    
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nrow);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", ncol);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", nonz);
-    dummy += fscanf(fp, "%14c", buf); sscanf(buf, "%d", &tmp);
-    
-    if (tmp != 0)
-         printf("This is not an assembled matrix!\n");
-    if (*nrow != *ncol)
-       printf("Matrix is not square.\n");
-    zDumpLine(fp);
-
-    /* Allocate storage for the three arrays ( nzval, rowind, colptr ) */
-    zallocateA(*ncol, *nonz, nzval, rowind, colptr);
-
-    /* Line 4: format statement */
-    dummy += fscanf(fp, "%16c", buf);
-    zParseIntFormat(buf, &colnum, &colsize);
-    dummy += fscanf(fp, "%16c", buf);
-    zParseIntFormat(buf, &rownum, &rowsize);
-    dummy += fscanf(fp, "%20c", buf);
-    zParseFloatFormat(buf, &valnum, &valsize);
-    dummy += fscanf(fp, "%20c", buf);
-    zDumpLine(fp);
-
-    /* Line 5: right-hand side */    
-    if ( rhscrd ) zDumpLine(fp); /* skip RHSFMT */
-    
-#ifdef DEBUG
-    printf("%d rows, %d nonzeros\n", *nrow, *nonz);
-    printf("colnum %d, colsize %d\n", colnum, colsize);
-    printf("rownum %d, rowsize %d\n", rownum, rowsize);
-    printf("valnum %d, valsize %d\n", valnum, valsize);
-#endif
-    
-    zReadVector(fp, *ncol+1, *colptr, colnum, colsize);
-    zReadVector(fp, *nonz, *rowind, rownum, rowsize);
-    if ( numer_lines ) {
-        zReadValues(fp, *nonz, *nzval, valnum, valsize);
-    }
-    
-    fclose(fp);
-
-}
-
diff --git a/superlu/zsnode_bmod.c b/superlu/zsnode_bmod.c
deleted file mode 100644
index e10da825..00000000
--- a/superlu/zsnode_bmod.c
+++ /dev/null
@@ -1,129 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
-
-
-/*
- * Performs numeric block updates within the relaxed snode. 
- */
-int
-zsnode_bmod (
-           const int  jcol,      /* in */
-           const int  jsupno,    /* in */
-           const int  fsupc,     /* in */
-           doublecomplex     *dense,    /* in */
-           doublecomplex     *tempv,    /* working array */
-           GlobalLU_t *Glu,      /* modified */
-           SuperLUStat_t *stat   /* output */
-           )
-{
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    int            incx = 1, incy = 1;
-    doublecomplex         alpha = {-1.0, 0.0},  beta = {1.0, 0.0};
-#endif
-
-    doublecomplex   comp_zero = {0.0, 0.0};
-    int            luptr, nsupc, nsupr, nrow;
-    int            isub, irow, i, iptr; 
-    register int   ufirst, nextlu;
-    int            *lsub, *xlsub;
-    doublecomplex         *lusup;
-    int            *xlusup;
-    flops_t *ops = stat->ops;
-
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-
-    nextlu = xlusup[jcol];
-    
-    /*
-     * Process the supernodal portion of L\U[*,j]
-     */
-    for (isub = xlsub[fsupc]; isub < xlsub[fsupc+1]; isub++) {
-       irow = lsub[isub];
-       lusup[nextlu] = dense[irow];
-        dense[irow] = comp_zero;
-       ++nextlu;
-    }
-
-    xlusup[jcol + 1] = nextlu; /* Initialize xlusup for next column */
-    
-    if ( fsupc < jcol ) {
-
-       luptr = xlusup[fsupc];
-       nsupr = xlsub[fsupc+1] - xlsub[fsupc];
-       nsupc = jcol - fsupc;   /* Excluding jcol */
-       ufirst = xlusup[jcol];  /* Points to the beginning of column
-                                  jcol in supernode L\U(jsupno). */
-       nrow = nsupr - nsupc;
-
-       ops[TRSV] += 4 * nsupc * (nsupc - 1);
-       ops[GEMV] += 8 * nrow * nsupc;
-
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-       CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       CGEMV( ftcs2, &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#else
-       ztrsv_( "L", "N", "U", &nsupc, &lusup[luptr], &nsupr, 
-             &lusup[ufirst], &incx );
-       zgemv_( "N", &nrow, &nsupc, &alpha, &lusup[luptr+nsupc], &nsupr, 
-               &lusup[ufirst], &incx, &beta, &lusup[ufirst+nsupc], &incy );
-#endif
-#else
-       zlsolve ( nsupr, nsupc, &lusup[luptr], &lusup[ufirst] );
-       zmatvec ( nsupr, nrow, nsupc, &lusup[luptr+nsupc], 
-                       &lusup[ufirst], &tempv[0] );
-
-        /* Scatter tempv[*] into lusup[*] */
-       iptr = ufirst + nsupc;
-       for (i = 0; i < nrow; i++) {
-           z_sub(&lusup[iptr], &lusup[iptr], &tempv[i]);
-            ++iptr;
-           tempv[i] = comp_zero;
-       }
-#endif
-
-    }
-
-    return 0;
-}
diff --git a/superlu/zsnode_dfs.c b/superlu/zsnode_dfs.c
deleted file mode 100644
index c860a6fb..00000000
--- a/superlu/zsnode_dfs.c
+++ /dev/null
@@ -1,113 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-
-#include "slu_zdefs.h"
-
-int
-zsnode_dfs (
-          const int  jcol,         /* in - start of the supernode */
-          const int  kcol,         /* in - end of the supernode */
-          const int  *asub,        /* in */
-          const int  *xa_begin,    /* in */
-          const int  *xa_end,      /* in */
-          int        *xprune,      /* out */
-          int        *marker,      /* modified */
-          GlobalLU_t *Glu          /* modified */
-          )
-{
-/* Purpose
- * =======
- *    zsnode_dfs() - Determine the union of the row structures of those 
- *    columns within the relaxed snode.
- *    Note: The relaxed snodes are leaves of the supernodal etree, therefore, 
- *    the portion outside the rectangular supernode must be zero.
- *
- * Return value
- * ============
- *     0   success;
- *    >0   number of bytes allocated when run out of memory.
- *
- */
-    register int i, k, ifrom, ito, nextl, new_next;
-    int          nsuper, krow, kmark, mem_error;
-    int          *xsup, *supno;
-    int          *lsub, *xlsub;
-    int          nzlmax;
-    
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    nzlmax  = Glu->nzlmax;
-
-    nsuper = ++supno[jcol];    /* Next available supernode number */
-    nextl = xlsub[jcol];
-
-    for (i = jcol; i <= kcol; i++) {
-       /* For each nonzero in A[*,i] */
-       for (k = xa_begin[i]; k < xa_end[i]; k++) {     
-           krow = asub[k];
-           kmark = marker[krow];
-           if ( kmark != kcol ) { /* First time visit krow */
-               marker[krow] = kcol;
-               lsub[nextl++] = krow;
-               if ( nextl >= nzlmax ) {
-                   if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, 
Glu) )
-                       return (mem_error);
-                   lsub = Glu->lsub;
-               }
-           }
-       }
-       supno[i] = nsuper;
-    }
-
-    /* Supernode > 1, then make a copy of the subscripts for pruning */
-    if ( jcol < kcol ) {
-       new_next = nextl + (nextl - xlsub[jcol]);
-       while ( new_next > nzlmax ) {
-           if ( mem_error = zLUMemXpand(jcol, nextl, LSUB, &nzlmax, Glu) )
-               return (mem_error);
-           lsub = Glu->lsub;
-       }
-       ito = nextl;
-       for (ifrom = xlsub[jcol]; ifrom < nextl; )
-           lsub[ito++] = lsub[ifrom++];        
-        for (i = jcol+1; i <= kcol; i++) xlsub[i] = nextl;
-       nextl = ito;
-    }
-
-    xsup[nsuper+1] = kcol + 1;
-    supno[kcol+1]  = nsuper;
-    xprune[kcol]   = nextl;
-    xlsub[kcol+1]  = nextl;
-
-    return 0;
-}
-
diff --git a/superlu/zsp_blas2.c b/superlu/zsp_blas2.c
deleted file mode 100644
index 525f753d..00000000
--- a/superlu/zsp_blas2.c
+++ /dev/null
@@ -1,576 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
- * File name:          zsp_blas2.c
- * Purpose:            Sparse BLAS 2, using some dense BLAS 2 operations.
- */
-
-#include "slu_zdefs.h"
-extern void ztrsv_();
-extern void zgemv_();
-
-/* 
- * Function prototypes 
- */
-void zusolve(int, int, doublecomplex*, doublecomplex*);
-void zlsolve(int, int, doublecomplex*, doublecomplex*);
-void zmatvec(int, int, int, doublecomplex*, doublecomplex*, doublecomplex*);
-
-
-int
-sp_ztrsv(char *uplo, char *trans, char *diag, SuperMatrix *L, 
-         SuperMatrix *U, doublecomplex *x, SuperLUStat_t *stat, int *info)
-{
-/*
- *   Purpose
- *   =======
- *
- *   sp_ztrsv() solves one of the systems of equations   
- *       A*x = b,   or   A'*x = b,
- *   where b and x are n element vectors and A is a sparse unit , or   
- *   non-unit, upper or lower triangular matrix.   
- *   No test for singularity or near-singularity is included in this   
- *   routine. Such tests must be performed before calling this routine.   
- *
- *   Parameters   
- *   ==========   
- *
- *   uplo   - (input) char*
- *            On entry, uplo specifies whether the matrix is an upper or   
- *             lower triangular matrix as follows:   
- *                uplo = 'U' or 'u'   A is an upper triangular matrix.   
- *                uplo = 'L' or 'l'   A is a lower triangular matrix.   
- *
- *   trans  - (input) char*
- *             On entry, trans specifies the equations to be solved as   
- *             follows:   
- *                trans = 'N' or 'n'   A*x = b.   
- *                trans = 'T' or 't'   A'*x = b.
- *                trans = 'C' or 'c'   A^H*x = b.   
- *
- *   diag   - (input) char*
- *             On entry, diag specifies whether or not A is unit   
- *             triangular as follows:   
- *                diag = 'U' or 'u'   A is assumed to be unit triangular.   
- *                diag = 'N' or 'n'   A is not assumed to be unit   
- *                                    triangular.   
- *          
- *   L       - (input) SuperMatrix*
- *            The factor L from the factorization Pr*A*Pc=L*U. Use
- *             compressed row subscripts storage for supernodes,
- *             i.e., L has types: Stype = SC, Dtype = SLU_Z, Mtype = TRLU.
- *
- *   U       - (input) SuperMatrix*
- *             The factor U from the factorization Pr*A*Pc=L*U.
- *             U has types: Stype = NC, Dtype = SLU_Z, Mtype = TRU.
- *    
- *   x       - (input/output) doublecomplex*
- *             Before entry, the incremented array X must contain the n   
- *             element right-hand side vector b. On exit, X is overwritten 
- *             with the solution vector x.
- *
- *   info    - (output) int*
- *             If *info = -i, the i-th argument had an illegal value.
- *
- */
-#ifdef _CRAY
-    _fcd ftcs1 = _cptofcd("L", strlen("L")),
-        ftcs2 = _cptofcd("N", strlen("N")),
-        ftcs3 = _cptofcd("U", strlen("U"));
-#endif
-    SCformat *Lstore;
-    NCformat *Ustore;
-    doublecomplex   *Lval, *Uval;
-    int incx = 1, incy = 1;
-    doublecomplex temp;
-    doublecomplex alpha = {1.0, 0.0}, beta = {1.0, 0.0};
-    doublecomplex comp_zero = {0.0, 0.0};
-    int nrow;
-    int fsupc, nsupr, nsupc, luptr, istart, irow;
-    int i, k, iptr, jcol;
-    doublecomplex *work;
-    flops_t solve_ops;
-
-    /* Test the input parameters */
-    *info = 0;
-    if ( !lsame_(uplo,"L") && !lsame_(uplo, "U") ) *info = -1;
-    else if ( !lsame_(trans, "N") && !lsame_(trans, "T") && 
-              !lsame_(trans, "C")) *info = -2;
-    else if ( !lsame_(diag, "U") && !lsame_(diag, "N") ) *info = -3;
-    else if ( L->nrow != L->ncol || L->nrow < 0 ) *info = -4;
-    else if ( U->nrow != U->ncol || U->nrow < 0 ) *info = -5;
-    if ( *info ) {
-       i = -(*info);
-       xerbla_("sp_ztrsv", &i);
-       return 0;
-    }
-
-    Lstore = L->Store;
-    Lval = Lstore->nzval;
-    Ustore = U->Store;
-    Uval = Ustore->nzval;
-    solve_ops = 0;
-
-    if ( !(work = doublecomplexCalloc(L->nrow)) )
-       ABORT("Malloc fails for work in sp_ztrsv().");
-    
-    if ( lsame_(trans, "N") ) {        /* Form x := inv(A)*x. */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L)*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               nrow = nsupr - nsupc;
-
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc - 1) + 10 * nsupc;
-               solve_ops += 8 * nrow * nsupc;
-
-               if ( nsupc == 1 ) {
-                   for (iptr=istart+1; iptr < L_SUB_START(fsupc+1); ++iptr) {
-                       irow = L_SUB(iptr);
-                       ++luptr;
-                       zz_mult(&comp_zero, &x[fsupc], &Lval[luptr]);
-                       z_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   CGEMV(ftcs2, &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#else
-                   ztrsv_("L", "N", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-               
-                   zgemv_("N", &nrow, &nsupc, &alpha, &Lval[luptr+nsupc], 
-                       &nsupr, &x[fsupc], &incx, &beta, &work[0], &incy);
-#endif
-#else
-                   zlsolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc]);
-               
-                   zmatvec ( nsupr, nsupr-nsupc, nsupc, &Lval[luptr+nsupc],
-                             &x[fsupc], &work[0] );
-#endif         
-               
-                   iptr = istart + nsupc;
-                   for (i = 0; i < nrow; ++i, ++iptr) {
-                       irow = L_SUB(iptr);
-                       z_sub(&x[irow], &x[irow], &work[i]); /* Scatter */
-                       work[i] = comp_zero;
-
-                   }
-               }
-           } /* for k ... */
-           
-       } else {
-           /* Form x := inv(U)*x */
-           
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; k--) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-               
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-
-               if ( nsupc == 1 ) {
-                   z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-                   for (i = U_NZ_START(fsupc); i < U_NZ_START(fsupc+1); ++i) {
-                       irow = U_SUB(i);
-                       zz_mult(&comp_zero, &x[fsupc], &Uval[i]);
-                       z_sub(&x[irow], &x[irow], &comp_zero);
-                   }
-               } else {
-#ifdef USE_VENDOR_BLAS
-#ifdef _CRAY
-                   CTRSV(ftcs3, ftcs2, ftcs2, &nsupc, &Lval[luptr], &nsupr,
-                      &x[fsupc], &incx);
-#else
-                   ztrsv_("U", "N", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-#else          
-                   zusolve ( nsupr, nsupc, &Lval[luptr], &x[fsupc] );
-#endif         
-
-                   for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                       solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                       for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); 
-                               i++) {
-                           irow = U_SUB(i);
-                       zz_mult(&comp_zero, &x[jcol], &Uval[i]);
-                       z_sub(&x[irow], &x[irow], &comp_zero);
-                       }
-                    }
-               }
-           } /* for k ... */
-           
-       }
-    } else if ( lsame_(trans, "T") ) { /* Form x := inv(A')*x */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := inv(L')*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                       zz_mult(&comp_zero, &x[irow], &Lval[i]);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   CTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#else
-                   ztrsv_("L", "T", "U", &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#endif
-               }
-           }
-       } else {
-           /* Form x := inv(U')*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                       zz_mult(&comp_zero, &x[irow], &Uval[i]);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
-
-               if ( nsupc == 1 ) {
-                   z_div(&x[fsupc], &x[fsupc], &Lval[luptr]);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd("T", strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   CTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#else
-                   ztrsv_("U", "T", "N", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-               }
-           } /* for k ... */
-       }
-    } else { /* Form x := conj(inv(A'))*x */
-       
-       if ( lsame_(uplo, "L") ) {
-           /* Form x := conj(inv(L'))*x */
-           if ( L->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = Lstore->nsuper; k >= 0; --k) {
-               fsupc = L_FST_SUPC(k);
-               istart = L_SUB_START(fsupc);
-               nsupr = L_SUB_START(fsupc+1) - istart;
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               solve_ops += 8 * (nsupr - nsupc) * nsupc;
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   iptr = istart + nsupc;
-                   for (i = L_NZ_START(jcol) + nsupc; 
-                               i < L_NZ_START(jcol+1); i++) {
-                       irow = L_SUB(iptr);
-                        zz_conj(&temp, &Lval[i]);
-                       zz_mult(&comp_zero, &x[irow], &temp);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                       iptr++;
-                   }
-               }
-               
-               if ( nsupc > 1 ) {
-                   solve_ops += 4 * nsupc * (nsupc - 1);
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("L", strlen("L"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("U", strlen("U"));
-                   ZTRSV(ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                       &x[fsupc], &incx);
-#else
-                    ztrsv_("L", trans, "U", &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#endif
-               }
-           }
-       } else {
-           /* Form x := conj(inv(U'))*x */
-           if ( U->nrow == 0 ) return 0; /* Quick return */
-           
-           for (k = 0; k <= Lstore->nsuper; k++) {
-               fsupc = L_FST_SUPC(k);
-               nsupr = L_SUB_START(fsupc+1) - L_SUB_START(fsupc);
-               nsupc = L_FST_SUPC(k+1) - fsupc;
-               luptr = L_NZ_START(fsupc);
-
-               for (jcol = fsupc; jcol < L_FST_SUPC(k+1); jcol++) {
-                   solve_ops += 8*(U_NZ_START(jcol+1) - U_NZ_START(jcol));
-                   for (i = U_NZ_START(jcol); i < U_NZ_START(jcol+1); i++) {
-                       irow = U_SUB(i);
-                        zz_conj(&temp, &Uval[i]);
-                       zz_mult(&comp_zero, &x[irow], &temp);
-                       z_sub(&x[jcol], &x[jcol], &comp_zero);
-                   }
-               }
-
-                /* 1 z_div costs 10 flops */
-               solve_ops += 4 * nsupc * (nsupc + 1) + 10 * nsupc;
- 
-               if ( nsupc == 1 ) {
-                    zz_conj(&temp, &Lval[luptr]);
-                   z_div(&x[fsupc], &x[fsupc], &temp);
-               } else {
-#ifdef _CRAY
-                    ftcs1 = _cptofcd("U", strlen("U"));
-                    ftcs2 = _cptofcd(trans, strlen("T"));
-                    ftcs3 = _cptofcd("N", strlen("N"));
-                   ZTRSV( ftcs1, ftcs2, ftcs3, &nsupc, &Lval[luptr], &nsupr,
-                           &x[fsupc], &incx);
-#else
-                    ztrsv_("U", trans, "N", &nsupc, &Lval[luptr], &nsupr,
-                               &x[fsupc], &incx);
-#endif
-               }
-           } /* for k ... */
-       }
-    }
-
-    stat->ops[SOLVE] += solve_ops;
-    SUPERLU_FREE(work);
-    return 0;
-}
-
-
-
-int
-sp_zgemv(char *trans, doublecomplex alpha, SuperMatrix *A, doublecomplex *x, 
-        int incx, doublecomplex beta, doublecomplex *y, int incy)
-{
-/*  Purpose   
-    =======   
-
-    sp_zgemv()  performs one of the matrix-vector operations   
-       y := alpha*A*x + beta*y,   or   y := alpha*A'*x + beta*y,   
-    where alpha and beta are scalars, x and y are vectors and A is a
-    sparse A->nrow by A->ncol matrix.   
-
-    Parameters   
-    ==========   
-
-    TRANS  - (input) char*
-             On entry, TRANS specifies the operation to be performed as   
-             follows:   
-                TRANS = 'N' or 'n'   y := alpha*A*x + beta*y.   
-                TRANS = 'T' or 't'   y := alpha*A'*x + beta*y.   
-                TRANS = 'C' or 'c'   y := alpha*A'*x + beta*y.   
-
-    ALPHA  - (input) doublecomplex
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Before entry, the leading m by n part of the array A must   
-             contain the matrix of coefficients.   
-
-    X      - (input) doublecomplex*, array of DIMENSION at least   
-             ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.   
-             Before entry, the incremented array X must contain the   
-             vector x.   
-
-    INCX   - (input) int
-             On entry, INCX specifies the increment for the elements of   
-             X. INCX must not be zero.   
-
-    BETA   - (input) doublecomplex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then Y need not be set on input.   
-
-    Y      - (output) doublecomplex*,  array of DIMENSION at least   
-             ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'   
-             and at least   
-             ( 1 + ( n - 1 )*abs( INCY ) ) otherwise.   
-             Before entry with BETA non-zero, the incremented array Y   
-             must contain the vector y. On exit, Y is overwritten by the 
-             updated vector y.
-            
-    INCY   - (input) int
-             On entry, INCY specifies the increment for the elements of   
-             Y. INCY must not be zero.   
-
-    ==== Sparse Level 2 Blas routine.   
-*/
-
-    /* Local variables */
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    int info;
-    doublecomplex temp, temp1;
-    int lenx, leny, i, j, irow;
-    int iy, jx, jy, kx, ky;
-    int notran;
-    doublecomplex comp_zero = {0.0, 0.0};
-    doublecomplex comp_one = {1.0, 0.0};
-
-    notran = lsame_(trans, "N");
-    Astore = A->Store;
-    Aval = Astore->nzval;
-    
-    /* Test the input parameters */
-    info = 0;
-    if ( !notran && !lsame_(trans, "T") && !lsame_(trans, "C")) info = 1;
-    else if ( A->nrow < 0 || A->ncol < 0 ) info = 3;
-    else if (incx == 0) info = 5;
-    else if (incy == 0)        info = 8;
-    if (info != 0) {
-       xerbla_("sp_zgemv ", &info);
-       return 0;
-    }
-
-    /* Quick return if possible. */
-    if (A->nrow == 0 || A->ncol == 0 || 
-       z_eq(&alpha, &comp_zero) && 
-       z_eq(&beta, &comp_one))
-       return 0;
-
-
-    /* Set  LENX  and  LENY, the lengths of the vectors x and y, and set 
-       up the start points in  X  and  Y. */
-    if (lsame_(trans, "N")) {
-       lenx = A->ncol;
-       leny = A->nrow;
-    } else {
-       lenx = A->nrow;
-       leny = A->ncol;
-    }
-    if (incx > 0) kx = 0;
-    else kx =  - (lenx - 1) * incx;
-    if (incy > 0) ky = 0;
-    else ky =  - (leny - 1) * incy;
-
-    /* Start the operations. In this version the elements of A are   
-       accessed sequentially with one pass through A. */
-    /* First form  y := beta*y. */
-    if ( !z_eq(&beta, &comp_one) ) {
-       if (incy == 1) {
-           if ( z_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) y[i] = comp_zero;
-           else
-               for (i = 0; i < leny; ++i) 
-                 zz_mult(&y[i], &beta, &y[i]);
-       } else {
-           iy = ky;
-           if ( z_eq(&beta, &comp_zero) )
-               for (i = 0; i < leny; ++i) {
-                   y[iy] = comp_zero;
-                   iy += incy;
-               }
-           else
-               for (i = 0; i < leny; ++i) {
-                   zz_mult(&y[iy], &beta, &y[iy]);
-                   iy += incy;
-               }
-       }
-    }
-    
-    if ( z_eq(&alpha, &comp_zero) ) return 0;
-
-    if ( notran ) {
-       /* Form  y := alpha*A*x + y. */
-       jx = kx;
-       if (incy == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               if ( !z_eq(&x[jx], &comp_zero) ) {
-                   zz_mult(&temp, &alpha, &x[jx]);
-                   for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                       irow = Astore->rowind[i];
-                       zz_mult(&temp1, &temp,  &Aval[i]);
-                       z_add(&y[irow], &y[irow], &temp1);
-                   }
-               }
-               jx += incx;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    } else {
-       /* Form  y := alpha*A'*x + y. */
-       jy = ky;
-       if (incx == 1) {
-           for (j = 0; j < A->ncol; ++j) {
-               temp = comp_zero;
-               for (i = Astore->colptr[j]; i < Astore->colptr[j+1]; ++i) {
-                   irow = Astore->rowind[i];
-                   zz_mult(&temp1, &Aval[i], &x[irow]);
-                   z_add(&temp, &temp, &temp1);
-               }
-               zz_mult(&temp1, &alpha, &temp);
-               z_add(&y[jy], &y[jy], &temp1);
-               jy += incy;
-           }
-       } else {
-           ABORT("Not implemented.");
-       }
-    }
-    return 0;    
-} /* sp_zgemv */
-
diff --git a/superlu/zsp_blas3.c b/superlu/zsp_blas3.c
deleted file mode 100644
index 7a815e2f..00000000
--- a/superlu/zsp_blas3.c
+++ /dev/null
@@ -1,140 +0,0 @@
-
-/*
- * -- SuperLU routine (version 2.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * November 15, 1997
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-/*
-  Copyright (c) 1997 by Xerox Corporation.  All rights reserved.
- 
-  THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY
-  EXPRESSED OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
- 
-  Permission is hereby granted to use or copy this program for any
-  purpose, provided the above notices are retained on all copies.
-  Permission to modify the code and to distribute modified code is
-  granted, provided the above notices are retained, and a notice that
-  the code was modified is included with the above copyright notice.
-*/
-/*
- * File name:          sp_blas3.c
- * Purpose:            Sparse BLAS3, using some dense BLAS3 operations.
- */
-
-#include "slu_zdefs.h"
-
-int
-sp_zgemm(char *transa, char *transb, int m, int n, int k, 
-         doublecomplex alpha, SuperMatrix *A, doublecomplex *b, int ldb, 
-         doublecomplex beta, doublecomplex *c, int ldc)
-{
-/*  Purpose   
-    =======   
-
-    sp_z performs one of the matrix-matrix operations   
-
-       C := alpha*op( A )*op( B ) + beta*C,   
-
-    where  op( X ) is one of 
-
-       op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ),
-
-    alpha and beta are scalars, and A, B and C are matrices, with op( A ) 
-    an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix. 
-  
-
-    Parameters   
-    ==========   
-
-    TRANSA - (input) char*
-             On entry, TRANSA specifies the form of op( A ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSA = 'N' or 'n',  op( A ) = A.   
-                TRANSA = 'T' or 't',  op( A ) = A'.   
-                TRANSA = 'C' or 'c',  op( A ) = conjg( A' ).   
-             Unchanged on exit.   
-
-    TRANSB - (input) char*
-             On entry, TRANSB specifies the form of op( B ) to be used in 
-             the matrix multiplication as follows:   
-                TRANSB = 'N' or 'n',  op( B ) = B.   
-                TRANSB = 'T' or 't',  op( B ) = B'.   
-                TRANSB = 'C' or 'c',  op( B ) = conjg( B' ).   
-             Unchanged on exit.   
-
-    M      - (input) int   
-             On entry,  M  specifies  the number of rows of the matrix 
-            op( A ) and of the matrix C.  M must be at least zero. 
-            Unchanged on exit.   
-
-    N      - (input) int
-             On entry,  N specifies the number of columns of the matrix 
-            op( B ) and the number of columns of the matrix C. N must be 
-            at least zero.
-            Unchanged on exit.   
-
-    K      - (input) int
-             On entry, K specifies the number of columns of the matrix 
-            op( A ) and the number of rows of the matrix op( B ). K must 
-            be at least  zero.   
-             Unchanged on exit.
-            
-    ALPHA  - (input) doublecomplex
-             On entry, ALPHA specifies the scalar alpha.   
-
-    A      - (input) SuperMatrix*
-             Matrix A with a sparse format, of dimension (A->nrow, A->ncol).
-             Currently, the type of A can be:
-                 Stype = NC or NCP; Dtype = SLU_Z; Mtype = GE. 
-             In the future, more general A can be handled.
-
-    B      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDB, kb ), where kb 
is 
-             n when TRANSB = 'N' or 'n',  and is  k otherwise.   
-             Before entry with  TRANSB = 'N' or 'n',  the leading k by n 
-             part of the array B must contain the matrix B, otherwise 
-             the leading n by k part of the array B must contain the 
-             matrix B.   
-             Unchanged on exit.   
-
-    LDB    - (input) int
-             On entry, LDB specifies the first dimension of B as declared 
-             in the calling (sub) program. LDB must be at least max( 1, n ).  
-             Unchanged on exit.   
-
-    BETA   - (input) doublecomplex
-             On entry, BETA specifies the scalar beta. When BETA is   
-             supplied as zero then C need not be set on input.   
-
-    C      - DOUBLE COMPLEX PRECISION array of DIMENSION ( LDC, n ).   
-             Before entry, the leading m by n part of the array C must 
-             contain the matrix C,  except when beta is zero, in which 
-             case C need not be set on entry.   
-             On exit, the array C is overwritten by the m by n matrix 
-            ( alpha*op( A )*B + beta*C ).   
-
-    LDC    - (input) int
-             On entry, LDC specifies the first dimension of C as declared 
-             in the calling (sub)program. LDC must be at least max(1,m).   
-             Unchanged on exit.   
-
-    ==== Sparse Level 3 Blas routine.   
-*/
-    int    incx = 1, incy = 1;
-    int    j;
-
-    for (j = 0; j < n; ++j) {
-       sp_zgemv(transa, alpha, A, &b[ldb*j], incx, beta, &c[ldc*j], incy);
-    }
-    return 0;    
-}
diff --git a/superlu/zutil.c b/superlu/zutil.c
deleted file mode 100644
index 30bcca70..00000000
--- a/superlu/zutil.c
+++ /dev/null
@@ -1,482 +0,0 @@
-
-/*
- * -- SuperLU routine (version 3.0) --
- * Univ. of California Berkeley, Xerox Palo Alto Research Center,
- * and Lawrence Berkeley National Lab.
- * October 15, 2003
- *
- */
-/*
-Copyright (c) 2003, The Regents of the University of California, through
-Lawrence Berkeley National Laboratory (subject to receipt of any required 
-approvals from U.S. Dept. of Energy) 
-
-All rights reserved. 
-
-The source code is distributed under BSD license, see the file License.txt
-*/
-
-#include <math.h>
-#include "slu_zdefs.h"
-
-void
-zCreate_CompCol_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      doublecomplex *nzval, int *rowind, int *colptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NCformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NCformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->rowind = rowind;
-    Astore->colptr = colptr;
-}
-
-void
-zCreate_CompRow_Matrix(SuperMatrix *A, int m, int n, int nnz, 
-                      doublecomplex *nzval, int *colind, int *rowptr,
-                      Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    NRformat *Astore;
-
-    A->Stype = stype;
-    A->Dtype = dtype;
-    A->Mtype = mtype;
-    A->nrow = m;
-    A->ncol = n;
-    A->Store = (void *) SUPERLU_MALLOC( sizeof(NRformat) );
-    if ( !(A->Store) ) ABORT("SUPERLU_MALLOC fails for A->Store");
-    Astore = A->Store;
-    Astore->nnz = nnz;
-    Astore->nzval = nzval;
-    Astore->colind = colind;
-    Astore->rowptr = rowptr;
-}
-
-/* Copy matrix A into matrix B. */
-void
-zCopy_CompCol_Matrix(SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore, *Bstore;
-    int      ncol, nnz, i;
-
-    B->Stype = A->Stype;
-    B->Dtype = A->Dtype;
-    B->Mtype = A->Mtype;
-    B->nrow  = A->nrow;;
-    B->ncol  = ncol = A->ncol;
-    Astore   = (NCformat *) A->Store;
-    Bstore   = (NCformat *) B->Store;
-    Bstore->nnz = nnz = Astore->nnz;
-    for (i = 0; i < nnz; ++i)
-       ((doublecomplex *)Bstore->nzval)[i] = ((doublecomplex 
*)Astore->nzval)[i];
-    for (i = 0; i < nnz; ++i) Bstore->rowind[i] = Astore->rowind[i];
-    for (i = 0; i <= ncol; ++i) Bstore->colptr[i] = Astore->colptr[i];
-}
-
-
-void
-zCreate_Dense_Matrix(SuperMatrix *X, int m, int n, doublecomplex *x, int ldx,
-                   Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    DNformat    *Xstore;
-    
-    X->Stype = stype;
-    X->Dtype = dtype;
-    X->Mtype = mtype;
-    X->nrow = m;
-    X->ncol = n;
-    X->Store = (void *) SUPERLU_MALLOC( sizeof(DNformat) );
-    if ( !(X->Store) ) ABORT("SUPERLU_MALLOC fails for X->Store");
-    Xstore = (DNformat *) X->Store;
-    Xstore->lda = ldx;
-    Xstore->nzval = (doublecomplex *) x;
-}
-
-void
-zCopy_Dense_Matrix(int M, int N, doublecomplex *X, int ldx,
-                       doublecomplex *Y, int ldy)
-{
-/*
- *
- *  Purpose
- *  =======
- *
- *  Copies a two-dimensional matrix X to another matrix Y.
- */
-    int    i, j;
-    
-    for (j = 0; j < N; ++j)
-        for (i = 0; i < M; ++i)
-            Y[i + j*ldy] = X[i + j*ldx];
-}
-
-void
-zCreate_SuperNode_Matrix(SuperMatrix *L, int m, int n, int nnz, 
-                       doublecomplex *nzval, int *nzval_colptr, int *rowind,
-                       int *rowind_colptr, int *col_to_sup, int *sup_to_col,
-                       Stype_t stype, Dtype_t dtype, Mtype_t mtype)
-{
-    SCformat *Lstore;
-
-    L->Stype = stype;
-    L->Dtype = dtype;
-    L->Mtype = mtype;
-    L->nrow = m;
-    L->ncol = n;
-    L->Store = (void *) SUPERLU_MALLOC( sizeof(SCformat) );
-    if ( !(L->Store) ) ABORT("SUPERLU_MALLOC fails for L->Store");
-    Lstore = L->Store;
-    Lstore->nnz = nnz;
-    Lstore->nsuper = col_to_sup[n];
-    Lstore->nzval = nzval;
-    Lstore->nzval_colptr = nzval_colptr;
-    Lstore->rowind = rowind;
-    Lstore->rowind_colptr = rowind_colptr;
-    Lstore->col_to_sup = col_to_sup;
-    Lstore->sup_to_col = sup_to_col;
-
-}
-
-
-/*
- * Convert a row compressed storage into a column compressed storage.
- */
-void
-zCompRow_to_CompCol(int m, int n, int nnz, 
-                   doublecomplex *a, int *colind, int *rowptr,
-                   doublecomplex **at, int **rowind, int **colptr)
-{
-    register int i, j, col, relpos;
-    int *marker;
-
-    /* Allocate storage for another copy of the matrix. */
-    *at = (doublecomplex *) doublecomplexMalloc(nnz);
-    *rowind = (int *) intMalloc(nnz);
-    *colptr = (int *) intMalloc(n+1);
-    marker = (int *) intCalloc(n);
-    
-    /* Get counts of each column of A, and set up column pointers */
-    for (i = 0; i < m; ++i)
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) ++marker[colind[j]];
-    (*colptr)[0] = 0;
-    for (j = 0; j < n; ++j) {
-       (*colptr)[j+1] = (*colptr)[j] + marker[j];
-       marker[j] = (*colptr)[j];
-    }
-
-    /* Transfer the matrix into the compressed column storage. */
-    for (i = 0; i < m; ++i) {
-       for (j = rowptr[i]; j < rowptr[i+1]; ++j) {
-           col = colind[j];
-           relpos = marker[col];
-           (*rowind)[relpos] = i;
-           (*at)[relpos] = a[j];
-           ++marker[col];
-       }
-    }
-
-    SUPERLU_FREE(marker);
-}
-
-
-void
-zPrint_CompCol_Matrix(char *what, SuperMatrix *A)
-{
-    NCformat     *Astore;
-    register int i,n;
-    double       *dp;
-    
-    printf("\nCompCol matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (NCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, nnz %d\n", A->nrow,A->ncol,Astore->nnz);
-    printf("nzval: ");
-    for (i = 0; i < 2*Astore->colptr[n]; ++i) printf("%f  ", dp[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->colptr[n]; ++i) printf("%d  ", Astore->rowind[i]);
-    printf("\ncolptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->colptr[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-zPrint_SuperNode_Matrix(char *what, SuperMatrix *A)
-{
-    SCformat     *Astore;
-    register int i, j, k, c, d, n, nsup;
-    double       *dp;
-    int *col_to_sup, *sup_to_col, *rowind, *rowind_colptr;
-    
-    printf("\nSuperNode matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    n = A->ncol;
-    Astore = (SCformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    col_to_sup = Astore->col_to_sup;
-    sup_to_col = Astore->sup_to_col;
-    rowind_colptr = Astore->rowind_colptr;
-    rowind = Astore->rowind;
-    printf("nrow %d, ncol %d, nnz %d, nsuper %d\n", 
-          A->nrow,A->ncol,Astore->nnz,Astore->nsuper);
-    printf("nzval:\n");
-    for (k = 0; k <= Astore->nsuper; ++k) {
-      c = sup_to_col[k];
-      nsup = sup_to_col[k+1] - c;
-      for (j = c; j < c + nsup; ++j) {
-       d = Astore->nzval_colptr[j];
-       for (i = rowind_colptr[c]; i < rowind_colptr[c+1]; ++i) {
-         printf("%d\t%d\t%e\t%e\n", rowind[i], j, dp[d], dp[d+1]);
-          d += 2;      
-       }
-      }
-    }
-#if 0
-    for (i = 0; i < 2*Astore->nzval_colptr[n]; ++i) printf("%f  ", dp[i]);
-#endif
-    printf("\nnzval_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->nzval_colptr[i]);
-    printf("\nrowind: ");
-    for (i = 0; i < Astore->rowind_colptr[n]; ++i) 
-        printf("%d  ", Astore->rowind[i]);
-    printf("\nrowind_colptr: ");
-    for (i = 0; i <= n; ++i) printf("%d  ", Astore->rowind_colptr[i]);
-    printf("\ncol_to_sup: ");
-    for (i = 0; i < n; ++i) printf("%d  ", col_to_sup[i]);
-    printf("\nsup_to_col: ");
-    for (i = 0; i <= Astore->nsuper+1; ++i) 
-        printf("%d  ", sup_to_col[i]);
-    printf("\n");
-    fflush(stdout);
-}
-
-void
-zPrint_Dense_Matrix(char *what, SuperMatrix *A)
-{
-    DNformat     *Astore;
-    register int i, j, lda = Astore->lda;
-    double       *dp;
-    
-    printf("\nDense matrix %s:\n", what);
-    printf("Stype %d, Dtype %d, Mtype %d\n", A->Stype,A->Dtype,A->Mtype);
-    Astore = (DNformat *) A->Store;
-    dp = (double *) Astore->nzval;
-    printf("nrow %d, ncol %d, lda %d\n", A->nrow,A->ncol,lda);
-    printf("\nnzval: ");
-    for (j = 0; j < A->ncol; ++j) {
-        for (i = 0; i < 2*A->nrow; ++i) printf("%f  ", dp[i + j*2*lda]);
-        printf("\n");
-    }
-    printf("\n");
-    fflush(stdout);
-}
-
-/*
- * Diagnostic print of column "jcol" in the U/L factor.
- */
-void
-zprint_lu_col(char *msg, int jcol, int pivrow, int *xprune, GlobalLU_t *Glu)
-{
-    int     i, k, fsupc;
-    int     *xsup, *supno;
-    int     *xlsub, *lsub;
-    doublecomplex  *lusup;
-    int     *xlusup;
-    doublecomplex  *ucol;
-    int     *usub, *xusub;
-
-    xsup    = Glu->xsup;
-    supno   = Glu->supno;
-    lsub    = Glu->lsub;
-    xlsub   = Glu->xlsub;
-    lusup   = Glu->lusup;
-    xlusup  = Glu->xlusup;
-    ucol    = Glu->ucol;
-    usub    = Glu->usub;
-    xusub   = Glu->xusub;
-    
-    printf("%s", msg);
-    printf("col %d: pivrow %d, supno %d, xprune %d\n", 
-          jcol, pivrow, supno[jcol], xprune[jcol]);
-    
-    printf("\tU-col:\n");
-    for (i = xusub[jcol]; i < xusub[jcol+1]; i++)
-       printf("\t%d%10.4f, %10.4f\n", usub[i], ucol[i].r, ucol[i].i);
-    printf("\tL-col in rectangular snode:\n");
-    fsupc = xsup[supno[jcol]]; /* first col of the snode */
-    i = xlsub[fsupc];
-    k = xlusup[jcol];
-    while ( i < xlsub[fsupc+1] && k < xlusup[jcol+1] ) {
-       printf("\t%d\t%10.4f, %10.4f\n", lsub[i], lusup[k].r, lusup[k].i);
-       i++; k++;
-    }
-    fflush(stdout);
-}
-
-
-/*
- * Check whether tempv[] == 0. This should be true before and after 
- * calling any numeric routines, i.e., "panel_bmod" and "column_bmod". 
- */
-void zcheck_tempv(int n, doublecomplex *tempv)
-{
-    int i;
-       
-    for (i = 0; i < n; i++) {
-       if ((tempv[i].r != 0.0) || (tempv[i].i != 0.0))
-       {
-           fprintf(stderr,"tempv[%d] = {%f, %f}\n", i, tempv[i].r, tempv[i].i);
-           ABORT("zcheck_tempv");
-       }
-    }
-}
-
-
-void
-zGenXtrue(int n, int nrhs, doublecomplex *x, int ldx)
-{
-    int  i, j;
-    for (j = 0; j < nrhs; ++j)
-       for (i = 0; i < n; ++i) {
-           x[i + j*ldx].r = 1.0;
-           x[i + j*ldx].i = 0.0;
-       }
-}
-
-/*
- * Let rhs[i] = sum of i-th row of A, so the solution vector is all 1's
- */
-void
-zFillRHS(trans_t trans, int nrhs, doublecomplex *x, int ldx,
-         SuperMatrix *A, SuperMatrix *B)
-{
-    NCformat *Astore;
-    doublecomplex   *Aval;
-    DNformat *Bstore;
-    doublecomplex   *rhs;
-    doublecomplex one = {1.0, 0.0};
-    doublecomplex zero = {0.0, 0.0};
-    int      ldc;
-    char transc[1];
-
-    Astore = A->Store;
-    Aval   = (doublecomplex *) Astore->nzval;
-    Bstore = B->Store;
-    rhs    = Bstore->nzval;
-    ldc    = Bstore->lda;
-    
-    if ( trans == NOTRANS ) *(unsigned char *)transc = 'N';
-    else *(unsigned char *)transc = 'T';
-
-    sp_zgemm(transc, "N", A->nrow, nrhs, A->ncol, one, A,
-            x, ldx, zero, rhs, ldc);
-
-}
-
-/* 
- * Fills a doublecomplex precision array with a given value.
- */
-void 
-zfill(doublecomplex *a, int alen, doublecomplex dval)
-{
-    register int i;
-    for (i = 0; i < alen; i++) a[i] = dval;
-}
-
-
-
-/* 
- * Check the inf-norm of the error vector 
- */
-void zinf_norm_error(int nrhs, SuperMatrix *X, doublecomplex *xtrue)
-{
-    DNformat *Xstore;
-    double err, xnorm;
-    doublecomplex *Xmat, *soln_work;
-    doublecomplex temp;
-    int i, j;
-
-    Xstore = X->Store;
-    Xmat = Xstore->nzval;
-
-    for (j = 0; j < nrhs; j++) {
-      soln_work = &Xmat[j*Xstore->lda];
-      err = xnorm = 0.0;
-      for (i = 0; i < X->nrow; i++) {
-        z_sub(&temp, &soln_work[i], &xtrue[i]);
-       err = SUPERLU_MAX(err, z_abs(&temp));
-       xnorm = SUPERLU_MAX(xnorm, z_abs(&soln_work[i]));
-      }
-      err = err / xnorm;
-      printf("||X - Xtrue||/||X|| = %e\n", err);
-    }
-}
-
-
-
-/* Print performance of the code. */
-void
-zPrintPerf(SuperMatrix *L, SuperMatrix *U, mem_usage_t *mem_usage,
-           double rpg, double rcond, double *ferr,
-           double *berr, char *equed, SuperLUStat_t *stat)
-{
-    SCformat *Lstore;
-    NCformat *Ustore;
-    double   *utime;
-    flops_t  *ops;
-    
-    utime = stat->utime;
-    ops   = stat->ops;
-    
-    if ( utime[FACT] != 0. )
-       printf("Factor flops = %e\tMflops = %8.2f\n", ops[FACT],
-              ops[FACT]*1e-6/utime[FACT]);
-    printf("Identify relaxed snodes    = %8.2f\n", utime[RELAX]);
-    if ( utime[SOLVE] != 0. )
-       printf("Solve flops = %.0f, Mflops = %8.2f\n", ops[SOLVE],
-              ops[SOLVE]*1e-6/utime[SOLVE]);
-    
-    Lstore = (SCformat *) L->Store;
-    Ustore = (NCformat *) U->Store;
-    printf("\tNo of nonzeros in factor L = %d\n", Lstore->nnz);
-    printf("\tNo of nonzeros in factor U = %d\n", Ustore->nnz);
-    printf("\tNo of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz);
-       
-    printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
-          mem_usage->for_lu/1e6, mem_usage->total_needed/1e6,
-          mem_usage->expansions);
-       
-    printf("\tFactor\tMflops\tSolve\tMflops\tEtree\tEquil\tRcond\tRefine\n");
-    printf("PERF:%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f%8.2f\n",
-          utime[FACT], ops[FACT]*1e-6/utime[FACT],
-          utime[SOLVE], ops[SOLVE]*1e-6/utime[SOLVE],
-          utime[ETREE], utime[EQUIL], utime[RCOND], utime[REFINE]);
-    
-    printf("\tRpg\t\tRcond\t\tFerr\t\tBerr\t\tEquil?\n");
-    printf("NUM:\t%e\t%e\t%e\t%e\t%s\n",
-          rpg, rcond, ferr[0], berr[0], equed);
-    
-}
-
-
-
-
-int print_doublecomplex_vec(char *what, int n, doublecomplex *vec)
-{
-    int i;
-    printf("%s: n %d\n", what, n);
-    for (i = 0; i < n; ++i) printf("%d\t%f%f\n", i, vec[i].r, vec[i].i);
-    return 0;
-}
-
diff --git a/tests/laplacian.cc b/tests/laplacian.cc
index a8333043..a4450410 100644
--- a/tests/laplacian.cc
+++ b/tests/laplacian.cc
@@ -35,7 +35,6 @@
 #include "getfem/getfem_export.h"
 #include "getfem/getfem_regular_meshes.h"
 #include "getfem/getfem_derivatives.h"
-#include "getfem/getfem_superlu.h"
 #include "gmm/gmm.h"
 using std::endl; using std::cout; using std::cerr;
 using std::ends; using std::cin;
@@ -284,7 +283,9 @@ bool laplacian_problem::solve(void) {
     gmm::gmres(SM, U, B, P, 50, iter);
   } else {
     double rcond;
+#if defined(GMM_USES_SUPERLU)
     gmm::SuperLU_solve(SM, U, B, rcond);
+#endif
     cout << "cond = " << 1/rcond << "\n";
   }
 
diff --git a/tests/make_gmm_test.pl b/tests/make_gmm_test.pl
index 1afaf42a..f469c5da 100755
--- a/tests/make_gmm_test.pl
+++ b/tests/make_gmm_test.pl
@@ -23,7 +23,6 @@ sub numerique { $a <=> $b; }
 $nb_iter = 1;                # number of iterations on each test
 $islocal = 0;
 $with_qd = 0;                # test also with dd_real and qd_real
-$with_lapack = 0;            # link with lapack
 $srcdir = $ENV{srcdir};      # source directory
 $tests_to_be_done = "";
 $fix_base_type = -1;
@@ -37,9 +36,6 @@ while(@ARGV) {               # read optional parameters
   elsif ($param eq "with-qd") {
     $with_qd = 1;
   }
-  elsif ($param eq "with-lapack") {
-    $with_lapack = 1;
-  }
   elsif ($param eq "float") {
     $fix_base_type = 0;
   }
@@ -75,7 +71,6 @@ while(@ARGV) {               # read optional parameters
     print "valid parameters are:\n";
     print ". the number of iterations on each test\n";
     print ". with-qd : test also with dd_real and qd_real\n";
-    print ". with-lapack : link with lapack\n";
     print ". double, float, complex_double or complex_float";
     print " to fix the base type\n";
     print ". source name of a test procedure\n";
@@ -94,11 +89,6 @@ if ($tests_to_be_done eq "") {
   $tests_to_be_done = `ls $srcdir/gmm_torture*.cc`;  # list of tests
 }
 
-if ($with_qd && $with_lapack) {
-  print "Options with_qd and with_lapack are not compatible\n";
-  exit(1);
-}
-
 $nb_test = 0;                # number of test procedures
 $tests_list = $tests_to_be_done;
 while ($tests_list)
@@ -116,7 +106,6 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
 
     if ($nb_iter == 1) { print "Testing  $org_name"; }
     else { print "Test $iter for $org_name"; }
-    if ($with_lapack) { print " linked with lapack"; }
     if ($with_qd) { print " with qd types"; }
     print "\n";
 
@@ -131,14 +120,12 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
 
     print TMPF "\n\n";
 
-    if ($with_lapack) {
-      print TMPF "#include<gmm_lapack_interface.h>\n\n";
-    }
-
     if ($with_qd) {
       print TMPF "#include <qd/dd.h>\n";
       print TMPF "#include <qd/qd.h>\n";
       print TMPF "#include <qd/fpu.h>\n\n";
+    } else {
+      print TMPF "#include <gmm/gmm_lapack_interface.h>\n\n";
     }
 
     $reading_param = 1;
@@ -165,10 +152,6 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     # $TYPES[5] = "std::complex<long double> ";
     $NB_TYPES = 4.0;
 
-    if ($with_lapack) {
-      $NB_TYPES = 4.0;
-    }
-
     if ($with_qd) {
       $TYPES[0] = "dd_real";
       $TYPES[1] = "qd_real";
@@ -214,14 +197,14 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     print TMPF "    try {\n\n";
     for ($j = 0; $j < $nb_param; ++$j) {
       $a = rand(); $b = rand();
-      if ($with_lapack) { $a = $b = 1.0; }
+      #$a = $b = 1.0;
       $sizepp = $sizep + int(50.0*rand());
       $step = $sizep; if ($step == 0) { ++$step; }
       $step = int(1.0*int($sizepp/$step - 1)*rand()) + 1;
 
       if (($param[$j] == 1) || ($param[$j] == 2)) { # vectors
        $lt = $VECTOR_TYPES[0];
-       if ($param[$j] == 2 && $with_lapack==0) {
+       if ($param[$j] == 2) {
          $lt = $VECTOR_TYPES[int($NB_VECTOR_TYPES * rand())];
        }
        if ($a < 0.1) {
@@ -265,9 +248,7 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
        $sn = $s; if ($b < 0.3) { $sn = $s + int(50.0*rand()); }
        $param_name[$j] = "param$j";
        $lt = $MATRIX_TYPES[0];
-       if ($with_lapack==0) {
-         $lt = $MATRIX_TYPES[int($NB_MATRIX_TYPES * rand())];
-       }
+        $lt = $MATRIX_TYPES[int($NB_MATRIX_TYPES * rand())];
        $li = "      $lt param$j($sm, $sn);";
        
        if ($a < 0.3 || $b < 0.3) {
@@ -356,12 +337,7 @@ for ($iter = 1; $iter <= $nb_iter; ++$iter) {
     $compile_options=`sh ../gmm-config --build-flags`;
     chomp($compile_options);
     $compile_options="$compile_options -I$srcdir/../src -I$srcdir/../include 
-I../src -I../include";
-    $compile_libs="-lm";
-
-    if ($with_lapack) {
-      $compile_libs="-llapack -lblas -lg2c $compile_libs";
-      $compile_options="$compile_options -DGMM_USES_LAPACK"
-    }
+    $compile_libs=`sh ../gmm-config --libs`;
     if ($with_qd) { $compile_libs="-lqd $compile_libs"; }
 #   print "$compilo $compile_options $dest_name -o $root_name $compile_libs\n";
     print `$compilo $compile_options $dest_name -o $root_name $compile_libs`;
diff --git a/tests/schwarz_additive.cc b/tests/schwarz_additive.cc
index 47f0d481..9f0d8ec8 100644
--- a/tests/schwarz_additive.cc
+++ b/tests/schwarz_additive.cc
@@ -26,15 +26,14 @@
 /*                                                                        */
 /**************************************************************************/
 
-#define GETFEM_USES_SUPERLU
-
 #include "getfem/getfem_assembling.h"
 #include "getfem/getfem_regular_meshes.h"
 #include "getfem/getfem_export.h"
 #include "gmm/gmm.h"
-#ifdef GMM_USES_MPI
+#if defined(GMM_USES_MPI)
 #include <mpi.h>
 #endif
+
 using std::endl; using std::cout; using std::cerr;
 using std::ends; using std::cin;
 
@@ -77,7 +76,7 @@ struct pb_data {
 
   int solve_cg();
   int solve_cg2();
-#if defined(GETFEM_USES_SUPERLU)
+#if defined(GMM_USES_SUPERLU)
   int solve_superlu();
 #endif
   int solve_schwarz(int);
@@ -87,7 +86,7 @@ struct pb_data {
     switch (solver) {
     case 0 : return solve_cg();
     case 1 : return solve_cg2();
-#if defined(GETFEM_USES_SUPERLU)
+#if defined(GMM_USES_SUPERLU)
     case 2 : return solve_superlu();
 #endif
     default : return solve_schwarz(solver);
@@ -105,7 +104,7 @@ void pb_data::init(bgeot::md_param &params) {
   /***********************************************************************/
   /*  READING PARAMETER FILE.                                            */
   /***********************************************************************/
-  
+
   /* parametres physiques */
   N = int(params.int_value("N", "Dimension"));
   mu = params.real_value("MU", "Stiffness parameter mu");
@@ -114,7 +113,7 @@ void pb_data::init(bgeot::md_param &params) {
   lambda = params.real_value("LAMBDA", "lambda");
   D.resize(N); gmm::clear(D);
   D[N-1] = params.real_value("D", "Dirichlet condition");
-  
+
   /* parametres numeriques */
   LX = params.real_value("LX", "Size in X");
   LY = params.real_value("LY", "Size in Y");
@@ -126,9 +125,9 @@ void pb_data::init(bgeot::md_param &params) {
   overlap = params.real_value("OVERLAP", "overlap");
   K = int(params.int_value("K", "Degree"));
   solver = int(params.int_value("SOLVER", "solver"));
-  subdomsize = params.real_value("SUBDOMSIZE", "sub-domains size");  
+  subdomsize = params.real_value("SUBDOMSIZE", "sub-domains size");
   std::string meshname(params.string_value("MESHNAME",
-                            "mesh file name"));
+                       "mesh file name"));
   std::cout << "\n\n";
 
   /***********************************************************************/
@@ -144,38 +143,38 @@ void pb_data::init(bgeot::md_param &params) {
     base_node org(N); gmm::clear(org);
     std::vector<bgeot::base_small_vector> vtab(N);
     std::vector<size_type> ref(N); std::fill(ref.begin(), ref.end(), NX);
-    for (int i = 0; i < N; i++) { 
+    for (int i = 0; i < N; i++) {
       vtab[i] = bgeot::base_small_vector(N); gmm::clear(vtab[i]);
       (vtab[i])[i] = ((i == 0) ? LX : ((i == 1) ? LY : LZ)) / scalar_type(NX);
     }
     getfem::parallelepiped_regular_simplex_mesh(mesh, dim_type(N), org,
-                                               &(vtab[0]), &(ref[0]));
+                                                &(vtab[0]), &(ref[0]));
   }
 
   if (USECOARSE) { // coarse mesh
     base_node org(N); gmm::clear(org);
     std::vector<bgeot::base_small_vector> vtab(N);
     std::vector<size_type> ref(N); std::fill(ref.begin(), ref.end(), NXCOARSE);
-    for (int i = 0; i < N; i++) { 
+    for (int i = 0; i < N; i++) {
       vtab[i] = bgeot::base_small_vector(N); gmm::clear(vtab[i]);
-      (vtab[i])[i] = 
-       ((i == 0) ? LX : ((i == 1) ? LY : LZ)) / scalar_type(NXCOARSE);
+      (vtab[i])[i] = ((i == 0) ? LX
+                               : ((i == 1) ? LY : LZ)) / scalar_type(NXCOARSE);
     }
     getfem::parallelepiped_regular_simplex_mesh(mesh_coarse, dim_type(N), org,
-                                               &(vtab[0]), &(ref[0]));
+                                                &(vtab[0]), &(ref[0]));
   }
-  
+
   mesh.trans_of_convex(0);
   mesh.optimize_structure();
 
   dal::bit_vector nn = mesh.convex_index(dim_type(N));
   char method[500];
-  
+
   snprintf(method, 499, "FEM_PK(%d, %d)", N, K);
   mim.set_integration_method(nn, bgeot::dim_type(2*K));
   mef.set_finite_element(nn, getfem::fem_descriptor(method));
   mef_coarse.set_finite_element(mesh_coarse.convex_index(dim_type(N)),
-                               getfem::fem_descriptor(method));
+                                getfem::fem_descriptor(method));
   mef_data.set_finite_element(nn, getfem::fem_descriptor(method));
   mef.set_qdim(dim_type(N));
   mef_coarse.set_qdim(dim_type(N));
@@ -186,9 +185,10 @@ void pb_data::init(bgeot::md_param &params) {
     int k = mesh.structure_of_convex(j)->nb_faces();
     for (short_type i = 0; i < k; i++) {
       if (mesh.is_convex_having_neighbor(j, i)) {
-       gmm::copy(mesh.normal_of_face_of_convex(j, i, 0), un);
-       gmm::scale(un, 1/gmm::vect_norm2(un));
-       if (gmm::abs(un[N-1] - 1.0) < 1.0E-3) mesh.region(0).add(j, i);
+        gmm::copy(mesh.normal_of_face_of_convex(j, i, 0), un);
+        gmm::scale(un, 1/gmm::vect_norm2(un));
+        if (gmm::abs(un[N-1] - 1.0) < 1.0E-3)
+          mesh.region(0).add(j, i);
       }
     }
   }
@@ -198,7 +198,7 @@ void pb_data::assemble() {
   size_type nb_dof = mef.nb_dof();
   std::cout << "number of dof : "<< nb_dof << endl;
   size_type nb_dof_data = mef_data.nb_dof();
- 
+
   F.resize(nb_dof); gmm::clear(F);
   U.resize(nb_dof); gmm::clear(U);
   gmm::resize(RM, nb_dof, nb_dof);
@@ -214,7 +214,7 @@ void pb_data::assemble() {
   linalg_vector STF(N * nb_dof_data);
   interpolation_function(mef_data, STF, vol_force);
   getfem::asm_source_term(F, mim, mef, mef_data, STF);
-  
+
   linalg_vector UD(nb_dof);
   for (size_type j = 0; j < nb_dof/N; j++)
     for (size_type k = 0; k < size_type(N); k++) UD[j*N + k] = D[k];
@@ -228,10 +228,10 @@ int pb_data::solve_cg() {
   return int(iter.get_iteration());
 }
 
-#if defined(GETFEM_USES_SUPERLU)
+#if defined(GMM_USES_SUPERLU)
 int pb_data::solve_superlu() {
   double rcond;
-  SuperLU_solve(RM, U, F, rcond);
+  gmm::SuperLU_solve(RM, U, F, rcond);
   return 1;
 }
 #endif
@@ -255,7 +255,7 @@ int pb_data::solve_schwarz(int version) {
   size_type nsd = vB.size();
 
   cout << "Nomber of sub-domains = " << nsd + (USECOARSE != 0) << endl;
-  
+
   if (USECOARSE) {
     vB.resize(nsd+1);
     cout << "interpolation coarse mesh\n";
@@ -264,21 +264,21 @@ int pb_data::solve_schwarz(int version) {
     getfem::interpolation(mef_coarse, mef, vB[nsd], true);
     ++nsd;
   }
-  
+
   gmm::iteration iter(residual, 1, 1000000);
   switch (version) {
   case 3 : gmm::additive_schwarz(RM, U, F,
-             gmm::ildlt_precond<general_sparse_matrix>(), vB, iter,
-             gmm::using_cg(), gmm::using_cg());
+           gmm::ildlt_precond<general_sparse_matrix>(), vB, iter,
+           gmm::using_cg(), gmm::using_cg());
     break;
   case 4 : gmm::additive_schwarz(RM, U, F,
-             gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
-             gmm::using_gmres(), gmm::using_gmres());
+           gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
+           gmm::using_gmres(), gmm::using_gmres());
     break;
-#if defined(GETFEM_USES_SUPERLU)
+#if defined(GMM_USES_SUPERLU)
   case 5 : gmm::additive_schwarz(RM, U, F,
-             gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
-             gmm::using_superlu(), gmm::using_cg());
+           gmm::ilu_precond<general_sparse_matrix>(), vB, iter,
+           gmm::using_superlu(), gmm::using_cg());
     break;
 #endif
   }
@@ -287,19 +287,19 @@ int pb_data::solve_schwarz(int version) {
 
 
 int main(int argc, char *argv[]) {
-#ifdef GMM_USES_MPI
+#if defined(GMM_USES_MPI)
     MPI_Init(&argc,&argv);
 #endif
- 
+
   try {
     bgeot::md_param params;
     pb_data p;
-    
+
     std::cout << "initialization ...\n";
     params.read_command_line(argc, argv);
     p.init(params);
     p.mesh.stat();
-    
+
     p.assemble();
 
     double rutime = gmm::uclock_sec();
@@ -311,7 +311,7 @@ int main(int argc, char *argv[]) {
     cout << "final residual : " << gmm::vect_norm2(p.F) << endl;
   }
   GMM_STANDARD_CATCH_ERROR;
-#ifdef GMM_USES_MPI
+#if defined(GMM_USES_MPI)
    MPI_Finalize();
 #endif
   return 0;
diff --git a/tests/test_condensation.cc b/tests/test_condensation.cc
index 2b5ade1f..d62c3ed2 100644
--- a/tests/test_condensation.cc
+++ b/tests/test_condensation.cc
@@ -34,7 +34,11 @@ int main(int argc, char *argv[]) {
   int ret=0;
 
 #if defined(GMM_USES_MUMPS)
-  
+  std::string lsolver("mumps");
+#else
+  std::string lsolver("superlu");
+#endif
+
   gmm::set_traces_level(1);
 
   bgeot::md_param PARAM;
@@ -140,16 +144,16 @@ int main(int argc, char *argv[]) {
   std::cout<<"SOLVING MODEL 1 (without internal variables)"<<std::endl;
   gmm::iteration iter(1E-9, 1, 30);
   if (DIFFICULTY % 10000 > 999)
-    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
"mumps"), ls1B);
+    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
lsolver), ls1B);
   else
-    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
"mumps"), ls1A);
+    getfem::standard_solve(md1, iter, getfem::rselect_linear_solver(md1, 
lsolver), ls1A);
 
   std::cout<<"SOLVING MODEL 2 (with internal variables)"<<std::endl;
   iter.init();
   if (DIFFICULTY % 10000 > 999)
-    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
"mumps"), ls2B);
+    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
lsolver), ls2B);
   else
-    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
"mumps"), ls2A);
+    getfem::standard_solve(md2, iter, getfem::rselect_linear_solver(md2, 
lsolver), ls2A);
 
   if (debug) {
     std::cout<<std::endl<<"u1:"<<std::endl;
@@ -184,8 +188,6 @@ int main(int argc, char *argv[]) {
 
   std::cout<<"Test with difficulty "<<DIFFICULTY<<" returned "<<ret<<std::endl;
 
-#endif
-  
   GETFEM_MPI_FINALIZE;
   return ret;
 }



reply via email to

[Prev in Thread] Current Thread [Next in Thread]