--- pspp-0.8.4-original/src/language/stats/quick-cluster.c 2014-08-24 00:51:41.000000000 -0500 +++ pspp-0.8.4/src/language/stats/quick-cluster.c 2015-05-30 16:58:46.049760839 -0500 @@ -60,6 +60,10 @@ int ngroups; /* Number of group. (Given by the user) */ int maxiter; /* Maximum iterations (Given by the user) */ + int print_cluster_membership; /* true => print membership */ + int print_initial_clusters; /* true => print initial cluster */ + int save_cluster_membership; /* true => save cluster membership as QCL_1 in dataset */ + int save_cluster_distance; /* true => save cluster distance as QCL_2 in dataset */ const struct variable *wv; /* Weighting variable. */ @@ -89,7 +93,7 @@ static struct Kmeans *kmeans_create (const struct qc *qc); -static void kmeans_randomize_centers (struct Kmeans *kmeans, const struct qc *qc); +static void kmeans_randomize_centers (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *qc); static int kmeans_get_nearest_group (struct Kmeans *kmeans, struct ccase *c, const struct qc *); @@ -104,9 +108,11 @@ static void quick_cluster_show_centers (struct Kmeans *kmeans, bool initial, const struct qc *); +static void quick_cluster_show_membership (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *); + static void quick_cluster_show_number_cases (struct Kmeans *kmeans, const struct qc *); -static void quick_cluster_show_results (struct Kmeans *kmeans, const struct qc *); +static void quick_cluster_show_results (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *); int cmd_quick_cluster (struct lexer *lexer, struct dataset *ds); @@ -152,7 +158,7 @@ /* Creates random centers using randomly selected cases from the data. */ static void -kmeans_randomize_centers (struct Kmeans *kmeans, const struct qc *qc) +kmeans_randomize_centers (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *qc) { int i, j; for (i = 0; i < qc->ngroups; i++) @@ -346,11 +352,12 @@ bool redo; int diffs; bool show_warning1; + int redo_count = 0; show_warning1 = true; cluster: redo = false; - kmeans_randomize_centers (kmeans, qc); + kmeans_randomize_centers (kmeans, reader, qc); for (kmeans->lastiter = 0; kmeans->lastiter < qc->maxiter; kmeans->lastiter++) { @@ -377,8 +384,13 @@ break; } } + if (redo) - goto cluster; + { + redo_count++; + assert (redo_count < 10); + goto cluster; + } } @@ -446,6 +458,43 @@ tab_submit (t); } +/* Reports cluster membership for each case. */ +static void +quick_cluster_show_membership (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *qc) +{ + struct tab_table *t; + int nc, nr; + int i, clust; + struct ccase *c; + struct casereader *cs = casereader_clone (reader); + nc = 2; + nr = kmeans->n + 1; + t = tab_create (nc, nr); + tab_headers (t, 0, nc - 1, 0, 0); + tab_title (t, _("Cluster Membership")); + tab_text (t, 0, 0, TAB_CENTER, _("Case Number")); + tab_text (t, 1, 0, TAB_CENTER, _("Cluster")); + tab_box (t, TAL_2, TAL_2, TAL_0, TAL_1, 0, 0, nc - 1, nr - 1); + //tab_text (t, 0, 1, TAB_CENTER, _("1")); + //tab_text (t, 1, 1, TAB_CENTER, _("2")); + //tab_text (t, 0, 2, TAB_CENTER, _("2")); + //tab_text (t, 1, 2, TAB_CENTER, _("3")); + + //for (i = 0; i < kmeans->n; i++) + for (i = 0; (c = casereader_read (cs)) != NULL; i++, case_unref (c)) + { + assert (i < kmeans->n); + clust = kmeans_get_nearest_group (kmeans, c, qc); + //int clust = 1; // kmeans_get_nearest_group (kmeans, c, qc); + tab_text_format (t, 0, i+1, TAB_CENTER, "%d", (i + 1)); + tab_text_format (t, 1, i+1, TAB_CENTER, "%d", (clust + 1)); + } + assert (i == kmeans->n); + tab_submit (t); + casereader_destroy (cs); +} + + /* Reports number of cases of each single cluster. */ static void quick_cluster_show_number_cases (struct Kmeans *kmeans, const struct qc *qc) @@ -479,13 +528,15 @@ /* Reports. */ static void -quick_cluster_show_results (struct Kmeans *kmeans, const struct qc *qc) +quick_cluster_show_results (struct Kmeans *kmeans, const struct casereader *reader, const struct qc *qc) { kmeans_order_groups (kmeans, qc); - /* Uncomment the line below for reporting initial centers. */ - /* quick_cluster_show_centers (kmeans, true); */ + if( qc->print_initial_clusters ) + quick_cluster_show_centers (kmeans, true, qc); quick_cluster_show_centers (kmeans, false, qc); quick_cluster_show_number_cases (kmeans, qc); + if( qc->print_cluster_membership ) + quick_cluster_show_membership(kmeans, reader, qc); } int @@ -499,6 +550,10 @@ qc.maxiter = 2; qc.missing_type = MISS_LISTWISE; qc.exclude = MV_ANY; + qc.print_cluster_membership = false; /* default = do not output case cluster membership */ + qc.print_initial_clusters = false; /* default = do not print initial clusters */ + qc.save_cluster_membership = false; /* default = do not save case cluster membership */ + qc.save_cluster_distance = false; /* default = do not save case cluster distance */ if (!parse_variables_const (lexer, dict, &qc.vars, &qc.n_vars, PV_NO_DUPLICATE | PV_NUMERIC)) @@ -536,6 +591,34 @@ goto error; } } + else if (lex_match_id (lexer, "SAVE")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "CLUSTER")) + qc.save_cluster_membership = true; + else if (lex_match_id (lexer, "DISTANCE")) + qc.save_cluster_distance = true; + else + goto error; + } + } + else if (lex_match_id (lexer, "PRINT")) + { + lex_match (lexer, T_EQUALS); + while (lex_token (lexer) != T_ENDCMD + && lex_token (lexer) != T_SLASH) + { + if (lex_match_id (lexer, "CLUSTER")) + qc.print_cluster_membership = true; + else if (lex_match_id (lexer, "INITIAL")) + qc.print_initial_clusters = true; + else + goto error; + } + } else if (lex_match_id (lexer, "CRITERIA")) { lex_match (lexer, T_EQUALS); @@ -595,7 +678,7 @@ kmeans = kmeans_create (&qc); kmeans_cluster (kmeans, group, &qc); - quick_cluster_show_results (kmeans, &qc); + quick_cluster_show_results (kmeans, group, &qc); kmeans_destroy (kmeans); casereader_destroy (group); }