[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Help-gsl] OpenMP and GSL ?
From: |
Maxime Boissonneault |
Subject: |
[Help-gsl] OpenMP and GSL ? |
Date: |
Tue, 21 Apr 2009 16:31:09 -0400 |
User-agent: |
Mozilla/5.0 (Macintosh; U; Intel Mac OS X; fr; rv:1.8.1.21) Gecko/20090302 Thunderbird/2.0.0.21 Mnenhy/0.7.6.0 |
Hello,
I would like to know if there is any plan in adding OpenMP support to GSL ?
OpenMP is the easiest way of parallelizing any code and require almost
no work. I have myself modified the Runge-Kutta-Felhberg (4,5) algorithm
to take profit of my Core 2 Duo CPU. In the attached file, I simply
added a couple of #pragma that tells the compiler to split the loop in
many threads. If these pragma are not understood, it does not break the
compilation. If they are, you can get a speedup that is close to the
number of CPUs you have in many cases.
I don't know if you guys use unit testing to test GSL, but if you do,
adding OpenMP support could be very easy (since I believe that whatever
change you make to a code must be tested against unit tests).
--------
Put a ladder if there's a wall, don't be afraid to slip and fall, speak for
yourself or they'll speak for you.
--------
Si l'on savait ce que l'on fait, on n'appellerait pas ça de la recherche.
(Albert Einstein)
--------
Maxime Boissonneault
Étudiant chercheur au doctorat en physique
Président de l'AGLEBUS
--------------------------------------------
address@hidden
address@hidden
(819) 821-8000 #63043 (jour)
(819) 823-1913 (soirs et fins de semaine)
/* ode-initval/rkf45.c
*
* Copyright (C) 2001, 2004, 2007 Brian Gough
* Copyright (C) 2008 Maxime Boissonneault
* This file was modified by Maxime Boissonneault to enable the support of
* OpenMP while computing rkf45_apply(...)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA.
*/
/* Runge-Kutta-Fehlberg 4(5)*/
/* Reference eg. Hairer, E., Norsett S.P., Wanner, G. Solving ordinary
differential equations I, Nonstiff Problems, 2nd revised edition,
Springer, 2000.
*/
//#include <config.h>
#include <stdlib.h>
#include <string.h>
#include "gsl_errno.h"
#include "gsl_odeiv.h"
#include "odeiv_util.h"
/* Runge-Kutta-Fehlberg coefficients. Zero elements left out */
static const double ah[] = { 1.0/4.0, 3.0/8.0, 12.0/13.0, 1.0, 1.0/2.0 };
static const double b3[] = { 3.0/32.0, 9.0/32.0 };
static const double b4[] = { 1932.0/2197.0, -7200.0/2197.0, 7296.0/2197.0};
static const double b5[] = { 8341.0/4104.0, -32832.0/4104.0, 29440.0/4104.0,
-845.0/4104.0};
static const double b6[] = { -6080.0/20520.0, 41040.0/20520.0,
-28352.0/20520.0, 9295.0/20520.0, -5643.0/20520.0};
static const double c1 = 902880.0/7618050.0;
static const double c3 = 3953664.0/7618050.0;
static const double c4 = 3855735.0/7618050.0;
static const double c5 = -1371249.0/7618050.0;
static const double c6 = 277020.0/7618050.0;
/* These are the differences of fifth and fourth order coefficients
for error estimation */
static const double ec[] = { 0.0,
1.0 / 360.0,
0.0,
-128.0 / 4275.0,
-2197.0 / 75240.0,
1.0 / 50.0,
2.0 / 55.0
};
typedef struct
{
double *k1;
double *k2;
double *k3;
double *k4;
double *k5;
double *k6;
double *y0;
double *ytmp;
}
rkf45_state_t;
static void *
rkf45_alloc (size_t dim)
{
rkf45_state_t *state = (rkf45_state_t *) malloc (sizeof (rkf45_state_t));
if (state == 0)
{
GSL_ERROR_NULL ("failed to allocate space for rkf45_state", GSL_ENOMEM);
}
state->k1 = (double *) malloc (dim * sizeof (double));
if (state->k1 == 0)
{
free (state);
GSL_ERROR_NULL ("failed to allocate space for k1", GSL_ENOMEM);
}
state->k2 = (double *) malloc (dim * sizeof (double));
if (state->k2 == 0)
{
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for k2", GSL_ENOMEM);
}
state->k3 = (double *) malloc (dim * sizeof (double));
if (state->k3 == 0)
{
free (state->k2);
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for k3", GSL_ENOMEM);
}
state->k4 = (double *) malloc (dim * sizeof (double));
if (state->k4 == 0)
{
free (state->k3);
free (state->k2);
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for k4", GSL_ENOMEM);
}
state->k5 = (double *) malloc (dim * sizeof (double));
if (state->k5 == 0)
{
free (state->k4);
free (state->k3);
free (state->k2);
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for k5", GSL_ENOMEM);
}
state->k6 = (double *) malloc (dim * sizeof (double));
if (state->k6 == 0)
{
free (state->k5);
free (state->k4);
free (state->k3);
free (state->k2);
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for k6", GSL_ENOMEM);
}
state->y0 = (double *) malloc (dim * sizeof (double));
if (state->y0 == 0)
{
free (state->k6);
free (state->k5);
free (state->k4);
free (state->k3);
free (state->k2);
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for y0", GSL_ENOMEM);
}
state->ytmp = (double *) malloc (dim * sizeof (double));
if (state->ytmp == 0)
{
free (state->y0);
free (state->k6);
free (state->k5);
free (state->k4);
free (state->k3);
free (state->k2);
free (state->k1);
free (state);
GSL_ERROR_NULL ("failed to allocate space for ytmp", GSL_ENOMEM);
}
return state;
}
static int
rkf45_apply (void *vstate,
size_t dim,
double t,
double h,
double y[],
double yerr[],
const double dydt_in[],
double dydt_out[], const gsl_odeiv_system * sys)
{
rkf45_state_t *state = (rkf45_state_t *) vstate;
int i;
double *const k1 = state->k1;
double *const k2 = state->k2;
double *const k3 = state->k3;
double *const k4 = state->k4;
double *const k5 = state->k5;
double *const k6 = state->k6;
double *const ytmp = state->ytmp;
double *const y0 = state->y0;
DBL_MEMCPY (y0, y, dim);
/* k1 step */
if (dydt_in != NULL)
{
DBL_MEMCPY (k1, dydt_in, dim);
}
else
{
int s = GSL_ODEIV_FN_EVAL (sys, t, y, k1);
if (s != GSL_SUCCESS)
{
return s;
}
}
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
ytmp[i] = y[i] + ah[0] * h * k1[i];
/* k2 step */
{
int s = GSL_ODEIV_FN_EVAL (sys, t + ah[0] * h, ytmp, k2);
if (s != GSL_SUCCESS)
{
return s;
}
}
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
ytmp[i] = y[i] + h * (b3[0] * k1[i] + b3[1] * k2[i]);
/* k3 step */
{
int s = GSL_ODEIV_FN_EVAL (sys, t + ah[1] * h, ytmp, k3);
if (s != GSL_SUCCESS)
{
return s;
}
}
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
ytmp[i] = y[i] + h * (b4[0] * k1[i] + b4[1] * k2[i] + b4[2] * k3[i]);
/* k4 step */
{
int s = GSL_ODEIV_FN_EVAL (sys, t + ah[2] * h, ytmp, k4);
if (s != GSL_SUCCESS)
{
return s;
}
}
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
ytmp[i] =
y[i] + h * (b5[0] * k1[i] + b5[1] * k2[i] + b5[2] * k3[i] +
b5[3] * k4[i]);
/* k5 step */
{
int s = GSL_ODEIV_FN_EVAL (sys, t + ah[3] * h, ytmp, k5);
if (s != GSL_SUCCESS)
{
return s;
}
}
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
ytmp[i] =
y[i] + h * (b6[0] * k1[i] + b6[1] * k2[i] + b6[2] * k3[i] +
b6[3] * k4[i] + b6[4] * k5[i]);
/* k6 step and final sum */
{
int s = GSL_ODEIV_FN_EVAL (sys, t + ah[4] * h, ytmp, k6);
if (s != GSL_SUCCESS)
{
return s;
}
}
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
{
const double d_i = c1 * k1[i] + c3 * k3[i] + c4 * k4[i] + c5 * k5[i] + c6
* k6[i];
y[i] += h * d_i;
}
/* Derivatives at output */
if (dydt_out != NULL)
{
int s = GSL_ODEIV_FN_EVAL (sys, t + h, y, dydt_out);
if (s != GSL_SUCCESS)
{
/* Restore initial values */
DBL_MEMCPY (y, y0, dim);
return s;
}
}
/* difference between 4th and 5th order */
#pragma omp parallel for private(i) shared(dim,h)
for (i = 0; i < dim; i++)
{
yerr[i] = h * (ec[1] * k1[i] + ec[3] * k3[i] + ec[4] * k4[i]
+ ec[5] * k5[i] + ec[6] * k6[i]);
}
return GSL_SUCCESS;
}
static int
rkf45_reset (void *vstate, size_t dim)
{
rkf45_state_t *state = (rkf45_state_t *) vstate;
DBL_ZERO_MEMSET (state->k1, dim);
DBL_ZERO_MEMSET (state->k2, dim);
DBL_ZERO_MEMSET (state->k3, dim);
DBL_ZERO_MEMSET (state->k4, dim);
DBL_ZERO_MEMSET (state->k5, dim);
DBL_ZERO_MEMSET (state->k6, dim);
DBL_ZERO_MEMSET (state->ytmp, dim);
DBL_ZERO_MEMSET (state->y0, dim);
return GSL_SUCCESS;
}
static unsigned int
rkf45_order (void *vstate)
{
rkf45_state_t *state = (rkf45_state_t *) vstate;
state = 0; /* prevent warnings about unused parameters */
return 5;
}
static void
rkf45_free (void *vstate)
{
rkf45_state_t *state = (rkf45_state_t *) vstate;
free (state->ytmp);
free (state->y0);
free (state->k6);
free (state->k5);
free (state->k4);
free (state->k3);
free (state->k2);
free (state->k1);
free (state);
}
static const gsl_odeiv_step_type rkf45_type = { "rkf45", /* name */
1, /* can use dydt_in */
0, /* gives exact dydt_out */
&rkf45_alloc,
&rkf45_apply,
&rkf45_reset,
&rkf45_order,
&rkf45_free
};
const gsl_odeiv_step_type *gsl_odeiv_step_rkf45 = &rkf45_type;