[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Getfem-commits] r5457 - in /trunk/getfem: contrib/opt_assembly/ src/ sr
From: |
Yves . Renard |
Subject: |
[Getfem-commits] r5457 - in /trunk/getfem: contrib/opt_assembly/ src/ src/getfem/ |
Date: |
Wed, 02 Nov 2016 11:59:11 -0000 |
Author: renard
Date: Wed Nov 2 12:59:10 2016
New Revision: 5457
URL: http://svn.gna.org/viewcvs/getfem?rev=5457&view=rev
Log:
optimization of the gradient computation
Modified:
trunk/getfem/contrib/opt_assembly/opt_assembly.cc
trunk/getfem/src/bgeot_geometric_trans.cc
trunk/getfem/src/getfem/bgeot_geometric_trans.h
trunk/getfem/src/getfem/bgeot_tensor.h
trunk/getfem/src/getfem_fem.cc
Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc Wed Nov 2 12:59:10 2016
@@ -364,9 +364,10 @@
Iu, Iu,
getfem::asm_stiffness_matrix_for_homogeneous_linear_elasticity
(K, mim2, mf_u, lambda, mu));
- if (all)
+ if (all) {
MAT_TEST_2(ndofu, ndofu, "lambda*Div_Test_u*Div_Test2_u "
"+ mu*(Grad_Test_u'+Grad_Test_u):Grad_Test2_u", mim2, Iu, Iu);
+ }
// MAT_TEST_2(ndofu, ndofu,
// "lambda*((address@hidden))"
@@ -425,7 +426,7 @@
MAT_TEST_1("Test for linear non homogeneous elasticity stiffness matrix",
ndofu, ndofu, "(Div_Test_u*(lambda2*Id(meshdim)) "
- "+ mu2*Sym(Grad_Test_u)):Grad_Test2_u",
+ "+ (2*mu2)*Sym(Grad_Test_u)):Grad_Test2_u",
mim2, Iu, Iu,
getfem::asm_stiffness_matrix_for_linear_elasticity
(K, mim2, mf_u, mf_p, lambda2, mu2));
@@ -455,47 +456,47 @@
if (all || only_one == 1) // ndofu = 321602 ndofp = 160801 ndofchi = 1201
test_new_assembly(2, 400, 1);
// Vector source term : 0.20 | 0.66 |
- // Nonlinear residual : 0.31 | |
+ // Nonlinear residual : 0.27 | |
// Mass (scalar) : 0.18 | 0.56 | 0.04 | 0.06 | 0.06 | 0.06 |
// Mass (vector) : 0.30 | 0.80 | 0.09 | 0.15 | 0.06 | 0.09 |
- // Laplacian : 0.16 | 0.80 | 0.04 | 0.05 | 0.06 | 0.05 |
+ // Laplacian : 0.15 | 0.80 | 0.04 | 0.05 | 0.06 | 0.04 |
// Homogeneous elas : 0.30 | 1.88 | 0.08 | 0.13 | 0.06 | 0.09 |
- // Non-homogeneous elast: 0.35 | 2.26 | 0.09 | 0.16 | 0.06 | 0.13 |
+ // Non-homogeneous elast: 0.34 | 2.26 | 0.09 | 0.16 | 0.06 | 0.12 |
if (all || only_one == 2) // ndofu = 151959 ndofp = 50653 ndofchi = 6553
test_new_assembly(3, 36, 1);
// Vector source term : 0.26 | 0.79 |
- // Nonlinear residual : 0.54 | |
+ // Nonlinear residual : 0.49 | |
// Mass (scalar) : 0.21 | 0.58 | 0.05 | 0.08 | 0.08 | 0.06 |
// Mass (vector) : 0.68 | 1.37 | 0.17 | 0.27 | 0.08 | 0.33 |
// Laplacian : 0.17 | 1.15 | 0.03 | 0.06 | 0.08 | 0.03 |
- // Homogeneous elas : 0.79 | 4.25 | 0.26 | 0.33 | 0.08 | 0.38 |
+ // Homogeneous elas : 0.78 | 4.25 | 0.26 | 0.33 | 0.08 | 0.37 |
// Non-homogeneous elast: 0.83 | 6.29 | 0.26 | 0.33 | 0.08 | 0.42 |
if (all || only_one == 3) // ndofu = 321602 ndofp = 160801 ndofchi = 1201
test_new_assembly(2, 200, 2);
// Vector source term : 0.09 | 0.23 |
- // Nonlinear residual : 0.15 | |
- // Mass (scalar) : 0.09 | 0.25 | 0.02 | 0.03 | 0.03 | 0.03 |
+ // Nonlinear residual : 0.12 | |
+ // Mass (scalar) : 0.08 | 0.25 | 0.02 | 0.03 | 0.03 | 0.02 |
// Mass (vector) : 0.26 | 0.44 | 0.05 | 0.09 | 0.03 | 0.14 |
- // Laplacian : 0.09 | 0.37 | 0.02 | 0.03 | 0.03 | 0.03 |
- // Homogeneous elas : 0.26 | 1.28 | 0.06 | 0.09 | 0.03 | 0.14 |
- // Non-homogeneous elast: 0.28 | 2.38 | 0.06 | 0.10 | 0.03 | 0.16 |
+ // Laplacian : 0.08 | 0.37 | 0.02 | 0.03 | 0.03 | 0.02 |
+ // Homogeneous elas : 0.24 | 1.28 | 0.06 | 0.09 | 0.03 | 0.12 |
+ // Non-homogeneous elast: 0.26 | 2.38 | 0.06 | 0.10 | 0.03 | 0.13 |
if (all || only_one == 4) // ndofu = 151959 ndofp = 50653 ndofchi = 6553
test_new_assembly(3, 18, 2);
// Vector source term : 0.13 | 0.23 |
- // Nonlinear residual : 0.37 | |
+ // Nonlinear residual : 0.31 | |
// Mass (scalar) : 0.11 | 0.25 | 0.05 | 0.05 | 0.03 | 0.03 |
// Mass (vector) : 1.13 | 0.89 | 0.21 | 0.35 | 0.03 | 0.75 |
- // Laplacian : 0.10 | 0.53 | 0.03 | 0.04 | 0.03 | 0.03 |
- // Homogeneous elas : 1.66 | 3.35 | 0.59 | 0.73 | 0.03 | 0.90 |
- // Non-homogeneous elast: 1.70 | 9.08 | 0.59 | 0.73 | 0.03 | 0.94 |
+ // Laplacian : 0.08 | 0.53 | 0.03 | 0.03 | 0.03 | 0.02 |
+ // Homogeneous elas : 1.65 | 3.35 | 0.59 | 0.73 | 0.03 | 0.89 |
+ // Non-homogeneous elast: 1.68 | 9.08 | 0.59 | 0.73 | 0.03 | 0.92 |
if (all || only_one == 5) // ndofu = 151959 ndofp = 50653 ndofchi = 6553
test_new_assembly(3, 9, 4);
// Vector source term : 0.11 | 0.19 |
- // Nonlinear residual : 0.36 | |
+ // Nonlinear residual : 0.29 | |
// Mass (scalar) : 0.51 | 0.34 | 0.09 | 0.16 | 0.01 | 0.33 |
// Mass (vector) : 4.37 | 1.31 | 0.41 | 1.27 | 0.01 | 3.10 |
- // Laplacian : 0.40 | 0.76 | 0.09 | 0.14 | 0.01 | 0.25 |
- // Homogeneous elas : 8.93 | 5.23 | 0.82 | 1.41 | 0.01 | 7.49 |
+ // Laplacian : 0.36 | 0.76 | 0.09 | 0.14 | 0.01 | 0.21 |
+ // Homogeneous elas : 8.85 | 5.23 | 0.82 | 1.41 | 0.01 | 7.41 |
// Non-homogeneous elast: 8.94 | 47.4 | 0.82 | 1.41 | 0.01 | 7.50 |
// Conclusions :
Modified: trunk/getfem/src/bgeot_geometric_trans.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/bgeot_geometric_trans.cc?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/bgeot_geometric_trans.cc (original)
+++ trunk/getfem/src/bgeot_geometric_trans.cc Wed Nov 2 12:59:10 2016
@@ -28,26 +28,86 @@
namespace bgeot {
-std::vector<scalar_type>& __aux1(){
- DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
- return v;
-}
-
-std::vector<scalar_type>& __aux2(){
- DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
- return v;
-}
-
-std::vector<scalar_type>& __aux3(){
- DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
- return v;
-}
-
-std::vector<int>& __ipvt_aux(){
- DEFINE_STATIC_THREAD_LOCAL(std::vector<int>, vi);
- return vi;
-}
+ std::vector<scalar_type>& __aux1(){
+ DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
+ return v;
+ }
+ std::vector<scalar_type>& __aux2(){
+ DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
+ return v;
+ }
+
+ std::vector<scalar_type>& __aux3(){
+ DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
+ return v;
+ }
+
+ std::vector<int>& __ipvt_aux(){
+ DEFINE_STATIC_THREAD_LOCAL(std::vector<int>, vi);
+ return vi;
+ }
+
+ // Optimized matrix mult for small matrices. To be verified.
+ // Multiply the matrix A of size MxN by B of size NxP in C of size MxP
+ void mat_mult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+ size_type M, size_type N, size_type P) {
+ auto itC = C; auto itB = B;
+ for (size_type j = 0; j < P; ++j, itB += N)
+ for (size_type i = 0; i < M; ++i, ++itC) {
+ auto itA = A+i, itB1 = itB;
+ *itC = (*itA) * (*itB1);
+ for (size_type k = 1; k < N; ++k)
+ { itA += M; ++itB1; *itC += (*itA) * (*itB1); }
+ }
+ }
+
+ // Optimized matrix mult for small matrices.
+ // Multiply the matrix A of size MxN by the transpose of B of size PxN
+ // in C of size MxP
+ void mat_tmult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+ size_type M, size_type N, size_type P) {
+ auto itC = C;
+ switch (N) {
+ case 0 : std::fill(C, C+M*P, scalar_type(0)); break;
+ case 1 :
+ for (size_type j = 0; j < P; ++j)
+ for (size_type i = 0; i < M; ++i, ++itC) {
+ auto itA = A+i, itB = B+j;
+ *itC = (*itA) * (*itB);
+ }
+ break;
+ case 2 :
+ for (size_type j = 0; j < P; ++j)
+ for (size_type i = 0; i < M; ++i, ++itC) {
+ auto itA = A+i, itB = B+j;
+ *itC = (*itA) * (*itB);
+ itA += M; itB += P; *itC += (*itA) * (*itB);
+ }
+ break;
+ case 3 :
+ for (size_type j = 0; j < P; ++j)
+ for (size_type i = 0; i < M; ++i, ++itC) {
+ auto itA = A+i, itB = B+j;
+ *itC = (*itA) * (*itB);
+ itA += M; itB += P; *itC += (*itA) * (*itB);
+ itA += M; itB += P; *itC += (*itA) * (*itB);
+ }
+ break;
+ default :
+ for (size_type j = 0; j < P; ++j)
+ for (size_type i = 0; i < M; ++i, ++itC) {
+ auto itA = A+i, itB = B+j;
+ *itC = (*itA) * (*itB);
+ for (size_type k = 1; k < N; ++k)
+ { itA += M; itB += P; *itC += (*itA) * (*itB); }
+ }
+ }
+ }
+
+
+
+
// Optimized lu_factor for small square matrices
size_type lu_factor(scalar_type *A, std::vector<int> &ipvt,
size_type N) {
Modified: trunk/getfem/src/getfem/bgeot_geometric_trans.h
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem/bgeot_geometric_trans.h?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/getfem/bgeot_geometric_trans.h (original)
+++ trunk/getfem/src/getfem/bgeot_geometric_trans.h Wed Nov 2 12:59:10 2016
@@ -515,6 +515,15 @@
{ return lu_det(&(*(A.begin())), A.nrows()); }
inline scalar_type lu_inverse(base_matrix &A, bool doassert = true)
{ return lu_inverse(&(*(A.begin())), A.nrows(), doassert); }
+ // Optimized matrix mult for small matrices.
+ // Multiply the matrix A of size MxN by B of size NxP in C of size MxP
+ void mat_mult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+ size_type M, size_type N, size_type P);
+ // Optimized matrix mult for small matrices.
+ // Multiply the matrix A of size MxN by the transpose of B of size PxN
+ // in C of size MxP
+ void mat_tmult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+ size_type M, size_type N, size_type P);
} /* end of namespace bgeot. */
Modified: trunk/getfem/src/getfem/bgeot_tensor.h
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem/bgeot_tensor.h?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/getfem/bgeot_tensor.h (original)
+++ trunk/getfem/src/getfem/bgeot_tensor.h Wed Nov 2 12:59:10 2016
@@ -200,34 +200,34 @@
inline size_type order(void) const { return sizes_.size(); }
void init(const multi_index &c) {
- multi_index::const_iterator it = c.begin();
+ auto it = c.begin();
size_type d = 1;
sizes_ = c; coeff.resize(c.size());
- multi_index::iterator p = coeff.begin(), pe = coeff.end();
+ auto p = coeff.begin(), pe = coeff.end();
for ( ; p != pe; ++p, ++it) { *p = d; d *= *it; }
this->resize(d);
}
- void init() { sizes_.resize(0); coeff.resize(0); this->resize(1); }
-
- void init(size_type i) {
+ inline void init() { sizes_.resize(0); coeff.resize(0); this->resize(1); }
+
+ inline void init(size_type i) {
sizes_.resize(1); sizes_[0] = i; coeff.resize(1); coeff[0] = 1;
this->resize(i);
}
- void init(size_type i, size_type j) {
+ inline void init(size_type i, size_type j) {
sizes_.resize(2); sizes_[0] = i; sizes_[1] = j;
coeff.resize(2); coeff[0] = 1; coeff[1] = i;
this->resize(i*j);
}
- void init(size_type i, size_type j, size_type k) {
+ inline void init(size_type i, size_type j, size_type k) {
sizes_.resize(3); sizes_[0] = i; sizes_[1] = j; sizes_[2] = k;
coeff.resize(3); coeff[0] = 1; coeff[1] = i; coeff[2] = i*j;
this->resize(i*j*k);
}
- void init(size_type i, size_type j, size_type k, size_type l) {
+ inline void init(size_type i, size_type j, size_type k, size_type l) {
sizes_.resize(4);
sizes_[0] = i; sizes_[1] = j; sizes_[2] = k; sizes_[3] = k;
coeff.resize(4);
@@ -235,29 +235,29 @@
this->resize(i*j*k*l);
}
- void adjust_sizes(const multi_index &mi) {
- if (!mi.size() || (mi.size() != sizes().size())
- || !(std::equal(mi.begin(), mi.end(), sizes().begin())))
- init(mi);
- }
-
- void adjust_sizes(void) { if (sizes_.size() || this->size() != 1) init(); }
-
- void adjust_sizes(size_type i)
- { if (sizes_.size() != 1 || sizes_[0] != i) init(i); }
-
- void adjust_sizes(size_type i, size_type j)
- { if (sizes_.size() != 2 || sizes_[0] != i || sizes_[1] != j) init(i, j); }
-
- void adjust_sizes(size_type i, size_type j, size_type k) {
- if (sizes_.size() != 3 || sizes_[0] != i || sizes_[1] != j
- || sizes_[2] != k)
- init(i, j, k);
- }
- void adjust_sizes(size_type i, size_type j, size_type k, size_type l) {
- if (sizes_.size() != 3 || sizes_[0] != i || sizes_[1] != j
- || sizes_[2] != k || sizes_[3] != l)
- init(i, j, k, l);
+ inline void adjust_sizes(const multi_index &mi) { init(mi); }
+ inline void adjust_sizes(void) { init(); }
+ inline void adjust_sizes(size_type i) { init(i); }
+ inline void adjust_sizes(size_type i, size_type j) { init(i, j); }
+ inline void adjust_sizes(size_type i, size_type j, size_type k)
+ { init(i, j, k); }
+ inline void adjust_sizes(size_type i, size_type j, size_type k, size_type
l)
+ { init(i, j, k, l); }
+
+ inline size_type adjust_sizes_changing_last(const tensor &t, size_type P) {
+ const multi_index &mi = t.sizes_; size_type d = mi.size();
+ sizes_.resize(d); coeff.resize(d);
+ if (d) {
+ std::copy(mi.begin(), mi.end(), sizes_.begin());
+ std::copy(t.coeff.begin(), t.coeff.end(), coeff.begin());
+ size_type e = coeff.back();
+ sizes_.back() = P;
+ this->resize(e*P);
+ return e;
+ } else {
+ this->resize(1);
+ return 1;
+ }
}
/** reduction of tensor t with respect to index ni with matrix m:
Modified: trunk/getfem/src/getfem_fem.cc
URL:
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_fem.cc?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/getfem_fem.cc (original)
+++ trunk/getfem/src/getfem_fem.cc Wed Nov 2 12:59:10 2016
@@ -77,6 +77,14 @@
// In that case, the storage available in ctx.pfp()->c, ctx.pfp()->pc
// and ctx.pfp()->hpc is not used.
+
+ // Specific multiplication for fem_interpolation_context use.
+ static inline void spec_mat_tmult_(const base_tensor &g, const base_matrix
&B,
+ base_tensor &t) {
+ size_type P = B.nrows(), N = B.ncols();
+ size_type M = t.adjust_sizes_changing_last(g, P);
+ bgeot::mat_tmult(&(*(g.begin())), &(*(B.begin())), &(*(t.begin())),M,N,P);
+ }
void fem_interpolation_context::pfp_base_value(base_tensor& t,
const pfem_precomp &pfp__) {
@@ -146,13 +154,14 @@
}
}
- void fem_interpolation_context::pfp_grad_base_value(base_tensor& t,
- const pfem_precomp
&pfp__) {
+ void fem_interpolation_context::pfp_grad_base_value
+ (base_tensor& t, const pfem_precomp &pfp__) {
const pfem &pf__ = pfp__->get_pfem();
GMM_ASSERT1(ii_ != size_type(-1), "Internal error");
if (pf__->is_standard()) {
- t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ // t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp__->grad(ii()), B(), t);
} else {
if (pf__->is_on_real_element())
pf__->real_grad_base_value(*this, t);
@@ -161,18 +170,22 @@
case virtual_fem::VECTORIAL_PRIMAL_TYPE:
{
base_tensor u;
- u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ // u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp__->grad(ii()), B(), u);
t.mat_transp_reduction(u, K(), 1);
}
break;
case virtual_fem::VECTORIAL_DUAL_TYPE:
{
base_tensor u;
- u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ // u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp__->grad(ii()), B(), u);
t.mat_transp_reduction(u, B(), 1);
}
break;
- default: t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ default:
+ // t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp__->grad(ii()), B(), t);
}
if (!(pf__->is_equivalent())) {
set_pfp(pfp__);
@@ -186,7 +199,8 @@
void fem_interpolation_context::grad_base_value(base_tensor& t,
bool withM) const {
if (pfp_ && ii_ != size_type(-1) && pf_->is_standard()) {
- t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ // t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp_->grad(ii()), B(), t);
} else {
if (pf()->is_on_real_element())
pf()->real_grad_base_value(*this, t);
@@ -196,25 +210,30 @@
case virtual_fem::VECTORIAL_PRIMAL_TYPE:
{
base_tensor u;
- u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ // u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp_->grad(ii()), B(), u);
t.mat_transp_reduction(u, K(), 1);
}
break;
case virtual_fem::VECTORIAL_DUAL_TYPE:
{
base_tensor u;
- u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ // u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp_->grad(ii()), B(), u);
t.mat_transp_reduction(u, B(), 1);
}
break;
- default: t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ default:
+ // t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+ spec_mat_tmult_(pfp_->grad(ii()), B(), t);
}
} else {
base_tensor u;
pf()->grad_base_value(xref(), u);
if (u.size()) { /* only if the FEM can provide grad_base_value */
- t.mat_transp_reduction(u, B(), 2);
+ // t.mat_transp_reduction(u, B(), 2);
+ spec_mat_tmult_(u, B(), t);
switch(pf()->vectorial_type()) {
case virtual_fem::VECTORIAL_PRIMAL_TYPE:
u = t; t.mat_transp_reduction(u, K(), 1); break;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Getfem-commits] r5457 - in /trunk/getfem: contrib/opt_assembly/ src/ src/getfem/,
Yves . Renard <=