[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Covariance Matrix
From: |
Jason Stover |
Subject: |
Re: Covariance Matrix |
Date: |
Tue, 6 Oct 2009 11:37:40 -0400 |
User-agent: |
Mutt/1.5.18 (2008-05-17) |
On Sun, Oct 04, 2009 at 03:21:51PM +0000, John Darrington wrote:
> It's currently implemented as a single pass algorithm, but it will
> be straightforward to change that. There's no categorical variables,
> or interactions, at present.
>
...
> I'd be interested in any comments and suggestions on how to proceed
> with generalising the implementation to accept categorical variables.
I have a patch below to start this. Right now, it just allocates space. It does
not properly retrieve entries with categorical variables, but it is a start:
diff --git a/src/language/stats/correlations.c
b/src/language/stats/correlations.c
index e397dae..65679f3 100644
--- a/src/language/stats/correlations.c
+++ b/src/language/stats/correlations.c
@@ -324,7 +324,7 @@ run_corr (struct casereader *r, const struct corr_opts
*opts, const struct corr
const gsl_matrix *var_matrix, *samples_matrix, *mean_matrix;
const gsl_matrix *cov_matrix;
gsl_matrix *corr_matrix;
- struct covariance *cov = covariance_create (corr->n_vars_total, corr->vars,
+ struct covariance *cov = covariance_create_pass1 (corr->n_vars_total,
corr->vars,
opts->wv, opts->exclude);
for ( ; (c = casereader_read (r) ); case_unref (c))
diff --git a/src/math/covariance.c b/src/math/covariance.c
index ba0de0b..350cab4 100644
--- a/src/math/covariance.c
+++ b/src/math/covariance.c
@@ -31,6 +31,7 @@ struct covariance
{
/* The variables for which the covariance matrix is to be calculated */
size_t n_vars;
+ size_t dim; /* This value equals n_vars if all variables are continuous. */
const struct variable **vars;
/* The weight variable (or NULL if none) */
@@ -64,11 +65,29 @@ covariance_moments (const struct covariance *cov, int m)
}
+static void
+covariance_create_part2 (struct covariance *cov, enum mv_class exclude)
+{
+ size_t i;
+
+ cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
+
+ for (i = 0; i < n_MOMENTS; ++i)
+ cov->moments[i] = gsl_matrix_calloc (cov->dim, cov->dim);
-/* Create a covariance struct */
+ cov->exclude = exclude;
+
+ cov->n_cm = (cov->dim * (cov->dim - 1) ) / 2;
+
+ cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+}
+
+/* Create a covariance struct to be computed in one data pass.
+ No categorical variables are allowed.
+*/
struct covariance *
-covariance_create (size_t n_vars, const struct variable **vars,
- const struct variable *weight, enum mv_class exclude)
+covariance_create_pass1 (size_t n_vars, const struct variable **vars,
+ const struct variable *weight, enum mv_class exclude)
{
size_t i;
struct covariance *cov = xmalloc (sizeof *cov);
@@ -76,20 +95,61 @@ covariance_create (size_t n_vars, const struct variable
**vars,
cov->wv = weight;
cov->n_vars = n_vars;
+ cov->dim = n_vars; /* Only numeric variables are allowed in a single data
pass,
+ so these values are equal.
+ */
for (i = 0; i < n_vars; ++i)
- cov->vars[i] = vars[i];
+ {
+ assert (var_is_numeric (vars[i]));
+ cov->vars[i] = vars[i];
+ }
- cov->moments = xmalloc (sizeof *cov->moments * n_MOMENTS);
-
- for (i = 0; i < n_MOMENTS; ++i)
- cov->moments[i] = gsl_matrix_calloc (n_vars, n_vars);
+ covariance_create_part2 (cov, exclude);
- cov->exclude = exclude;
+ return cov;
+}
- cov->n_cm = (n_vars * (n_vars - 1) ) / 2;
+static size_t
+get_dim (size_t n_vars, struct variable **vars)
+{
+ size_t i;
+ size_t dim = 0;
- cov->cm = xcalloc (sizeof *cov->cm, cov->n_cm);
+ for (i = 0; i < n_vars; i++)
+ {
+ if (var_is_numeric (vars[i]))
+ {
+ dim++;
+ }
+ else
+ {
+ dim += cat_get_n_categories (vars[i]);
+ }
+ }
+ return dim;
+}
+/* Create a covariance struct with categorical variables.
+ Call this function after the first data pass.
+*/
+struct covariance *
+covariance_create_pass2 (size_t n_vars, const struct variable **vars,
+ const struct variable *weight, enum mv_class exclude)
+{
+ size_t i;
+ struct covariance *cov = xmalloc (sizeof *cov);
+ cov->vars = xmalloc (sizeof *cov->vars * n_vars);
+
+ cov->wv = weight;
+ cov->n_vars = n_vars;
+
+ for (i = 0; i < n_vars; ++i)
+ cov->vars[i] = vars[i];
+
+ cov->dim = get_dim (n_vars, vars);
+
+
+ covariance_create_part2 (cov, exclude);
return cov;
}
diff --git a/src/math/covariance.h b/src/math/covariance.h
index 8b8de88..7a13cd2 100644
--- a/src/math/covariance.h
+++ b/src/math/covariance.h
@@ -27,8 +27,14 @@ struct covariance;
struct variable;
struct ccase ;
-struct covariance * covariance_create (size_t n_vars, const struct variable
**vars,
- const struct variable *wv, enum mv_class
excl);
+struct covariance * covariance_create_pass1 (size_t n_vars,
+ const struct variable **vars,
+ const struct variable *wv,
+ enum mv_class excl);
+struct covariance * covariance_create_pass2 (size_t n_vars,
+ const struct variable **vars,
+ const struct variable *wv,
+ enum mv_class excl);
void covariance_accumulate (struct covariance *, const struct ccase *);
- Re: Covariance Matrix, John Darrington, 2009/10/04
- Re: Covariance Matrix,
Jason Stover <=
- Re: Covariance Matrix, John Darrington, 2009/10/06
- Re: Covariance Matrix, Jason Stover, 2009/10/07
- Re: Covariance Matrix, John Darrington, 2009/10/07
- Re: Covariance Matrix, Jason Stover, 2009/10/07
- Re: Covariance Matrix, Jason Stover, 2009/10/07
- Re: Covariance Matrix, John Darrington, 2009/10/08
- Re: Covariance Matrix, Jason Stover, 2009/10/08