pspp-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: casereader numbering


From: John Darrington
Subject: Re: casereader numbering
Date: Thu, 24 Jul 2008 20:25:39 +0800
User-agent: Mutt/1.5.18 (2008-05-17)

On Thu, Jul 24, 2008 at 07:06:05AM +0800, John Darrington wrote:
     On Wed, Jul 23, 2008 at 07:24:21AM -0700, Ben Pfaff wrote:
          > On initial tests, it appears to work fine, except that I would have
          > expected casereader_get_value_cnt on the new casereader to return 1 
more
          > than that of the old one.  But that's not what I am experiencing.
          
          Er, I would expect that too.  On inspection, the code looks
          correct; I don't see how casereader_get_value_cnt() could return
          value different from that.  Huh.
     
     Maybe I made a mistake then.  I'll have closer look later.
     

On closer inspection it turns out that the culprit is the function
sort_execute :

struct casereader *
 sort_execute (struct casereader *input, struct case_ordering *ordering);

Instead  of returning a casereader the same width as INPUT, it returns
one with the width associated with ORDERING.

I don't see any valid reason to for a case_ordering to be aware of the
value_cnt, so I'm proposing this patch which seems to fix this
problem.

diff --git a/src/data/case-ordering.c b/src/data/case-ordering.c
index 7b3948c..c4a716e 100644
--- a/src/data/case-ordering.c
+++ b/src/data/case-ordering.c
@@ -37,8 +37,6 @@ struct sort_key
 /* A set of criteria for ordering cases. */
 struct case_ordering
   {
-    size_t value_cnt;           /* Number of `union value's per case. */
-
     /* Sort keys. */
     struct sort_key *keys;
     size_t key_cnt;
@@ -49,10 +47,9 @@ struct case_ordering
    contains no variables, so that all cases will compare as
    equal. */
 struct case_ordering *
-case_ordering_create (const struct dictionary *dict)
+case_ordering_create (void)
 {
   struct case_ordering *co = xmalloc (sizeof *co);
-  co->value_cnt = dict_get_next_value_idx (dict);
   co->keys = NULL;
   co->key_cnt = 0;
   return co;
@@ -63,7 +60,6 @@ struct case_ordering *
 case_ordering_clone (const struct case_ordering *orig)
 {
   struct case_ordering *co = xmalloc (sizeof *co);
-  co->value_cnt = orig->value_cnt;
   co->keys = xmemdup (orig->keys, orig->key_cnt * sizeof *orig->keys);
   co->key_cnt = orig->key_cnt;
   return co;
@@ -80,15 +76,6 @@ case_ordering_destroy (struct case_ordering *co)
     }
 }
 
-/* Returns the number of `union value's in the cases that case
-   ordering CO compares (taken from the dictionary used to
-   construct it). */
-size_t
-case_ordering_get_value_cnt (const struct case_ordering *co)
-{
-  return co->value_cnt;
-}
-
 /* Compares cases A and B given case ordering CO and returns a
    strcmp()-type result. */
 int
diff --git a/src/data/case-ordering.h b/src/data/case-ordering.h
index 026cd89..f49f265 100644
--- a/src/data/case-ordering.h
+++ b/src/data/case-ordering.h
@@ -32,7 +32,7 @@ enum sort_direction
   };
 
 /* Creation and destruction. */
-struct case_ordering *case_ordering_create (const struct dictionary *);
+struct case_ordering *case_ordering_create (void);
 struct case_ordering *case_ordering_clone (const struct case_ordering *);
 void case_ordering_destroy (struct case_ordering *);
 
diff --git a/src/language/stats/rank.q b/src/language/stats/rank.q
index 5bc88c4..cb63949 100644
--- a/src/language/stats/rank.q
+++ b/src/language/stats/rank.q
@@ -261,7 +261,7 @@ rank_cmd (struct dataset *ds, const struct case_ordering 
*sc,
 
           /* Sort this split group by the BY variables as primary
              keys and the rank variable as secondary key. */
-          ordering = case_ordering_create (d);
+          ordering = case_ordering_create ();
           for (j = 0; j < n_group_vars; j++)
             case_ordering_add_var (ordering, group_vars[j], SRT_ASCEND);
           case_ordering_add_var (ordering,
@@ -778,7 +778,7 @@ cmd_rank (struct lexer *lexer, struct dataset *ds)
   /* Put the active file back in its original order.  Delete
      our sort key, which we don't need anymore.  */
   {
-    struct case_ordering *ordering = case_ordering_create (dataset_dict (ds));
+    struct case_ordering *ordering = case_ordering_create ();
     struct casereader *sorted;
     case_ordering_add_var (ordering, order, SRT_ASCEND);
     /* FIXME: loses error conditions. */
diff --git a/src/language/stats/sort-criteria.c 
b/src/language/stats/sort-criteria.c
index c84f71d..fd8c7c5 100644
--- a/src/language/stats/sort-criteria.c
+++ b/src/language/stats/sort-criteria.c
@@ -39,7 +39,7 @@ struct case_ordering *
 parse_case_ordering (struct lexer *lexer, const struct dictionary *dict,
                      bool *saw_direction)
 {
-  struct case_ordering *ordering = case_ordering_create (dict);
+  struct case_ordering *ordering = case_ordering_create ();
   const struct variable **vars = NULL;
   size_t var_cnt = 0;
 
diff --git a/src/math/merge.c b/src/math/merge.c
index d56a78c..4fc7c8d 100644
--- a/src/math/merge.c
+++ b/src/math/merge.c
@@ -44,16 +44,18 @@ struct merge
     struct case_ordering *ordering;
     struct merge_input inputs[MAX_MERGE_ORDER];
     size_t input_cnt;
+    size_t value_cnt;
   };
 
 static void do_merge (struct merge *m);
 
 struct merge *
-merge_create (const struct case_ordering *ordering)
+merge_create (const struct case_ordering *ordering, size_t value_cnt)
 {
   struct merge *m = xmalloc (sizeof *m);
   m->ordering = case_ordering_clone (ordering);
   m->input_cnt = 0;
+  m->value_cnt = value_cnt;
   return m;
 }
 
@@ -95,8 +97,7 @@ merge_make_reader (struct merge *m)
     }
   else if (m->input_cnt == 0)
     {
-      size_t value_cnt = case_ordering_get_value_cnt (m->ordering);
-      struct casewriter *writer = mem_writer_create (value_cnt);
+      struct casewriter *writer = mem_writer_create (m->value_cnt);
       r = casewriter_make_reader (writer);
     }
   else
@@ -129,7 +130,7 @@ do_merge (struct merge *m)
 
   assert (m->input_cnt > 1);
 
-  w = tmpfile_writer_create (case_ordering_get_value_cnt (m->ordering));
+  w = tmpfile_writer_create (m->value_cnt);
   for (i = 0; i < m->input_cnt; i++)
     taint_propagate (casereader_get_taint (m->inputs[i].reader),
                      casewriter_get_taint (w));
diff --git a/src/math/merge.h b/src/math/merge.h
index c9c9c48..18322e8 100644
--- a/src/math/merge.h
+++ b/src/math/merge.h
@@ -18,11 +18,12 @@
 #define MATH_MERGE_H 1
 
 #include <stdbool.h>
+#include <stddef.h>
 
 struct case_ordering;
 struct casereader;
 
-struct merge *merge_create (const struct case_ordering *);
+struct merge *merge_create (const struct case_ordering *, size_t);
 void merge_destroy (struct merge *);
 void merge_append (struct merge *, struct casereader *);
 struct casereader *merge_make_reader (struct merge *);
diff --git a/src/math/sort.c b/src/math/sort.c
index e03ef57..10b8a12 100644
--- a/src/math/sort.c
+++ b/src/math/sort.c
@@ -41,6 +41,7 @@ int max_buffers = INT_MAX;
 
 struct sort_writer
   {
+    size_t value_cnt;
     struct case_ordering *ordering;
     struct merge *merge;
     struct pqueue *pqueue;
@@ -52,7 +53,7 @@ struct sort_writer
 
 static struct casewriter_class sort_casewriter_class;
 
-static struct pqueue *pqueue_create (const struct case_ordering *);
+static struct pqueue *pqueue_create (const struct case_ordering *, size_t);
 static void pqueue_destroy (struct pqueue *);
 static bool pqueue_is_full (const struct pqueue *);
 static bool pqueue_is_empty (const struct pqueue *);
@@ -62,15 +63,15 @@ static void pqueue_pop (struct pqueue *, struct ccase *, 
casenumber *);
 static void output_record (struct sort_writer *);
 
 struct casewriter *
-sort_create_writer (struct case_ordering *ordering)
+sort_create_writer (struct case_ordering *ordering, size_t value_cnt)
 {
-  size_t value_cnt = case_ordering_get_value_cnt (ordering);
   struct sort_writer *sort;
 
   sort = xmalloc (sizeof *sort);
+  sort->value_cnt = value_cnt;
   sort->ordering = case_ordering_clone (ordering);
-  sort->merge = merge_create (ordering);
-  sort->pqueue = pqueue_create (ordering);
+  sort->merge = merge_create (ordering, value_cnt);
+  sort->pqueue = pqueue_create (ordering, value_cnt);
   sort->run = NULL;
   sort->run_id = 0;
   case_nullify (&sort->run_end);
@@ -118,8 +119,7 @@ sort_casewriter_convert_to_reader (struct casewriter 
*writer, void *sort_)
   if (sort->run == NULL && sort->run_id == 0)
     {
       /* In-core sort. */
-      sort->run = mem_writer_create (case_ordering_get_value_cnt (
-                                       sort->ordering));
+      sort->run = mem_writer_create (casewriter_get_value_cnt (writer));
       sort->run_id = 1;
     }
   while (!pqueue_is_empty (sort->pqueue))
@@ -151,8 +151,7 @@ output_record (struct sort_writer *sort)
     }
   if (sort->run == NULL)
     {
-      sort->run = tmpfile_writer_create (case_ordering_get_value_cnt (
-                                           sort->ordering));
+      sort->run = tmpfile_writer_create (sort->value_cnt);
       sort->run_id = min_run_id;
     }
 
@@ -176,7 +175,8 @@ static struct casewriter_class sort_casewriter_class =
 struct casereader *
 sort_execute (struct casereader *input, struct case_ordering *ordering)
 {
-  struct casewriter *output = sort_create_writer (ordering);
+  struct casewriter *output =
+    sort_create_writer (ordering, casereader_get_value_cnt (input));
   casereader_transfer (input, output);
   return casewriter_make_reader (output);
 }
@@ -201,14 +201,14 @@ static int compare_pqueue_records_minheap (const void *a, 
const void *b,
                                            const void *pq_);
 
 static struct pqueue *
-pqueue_create (const struct case_ordering *ordering)
+pqueue_create (const struct case_ordering *ordering, size_t value_cnt)
 {
   struct pqueue *pq;
 
   pq = xmalloc (sizeof *pq);
   pq->ordering = case_ordering_clone (ordering);
   pq->record_cap
-    = settings_get_workspace_cases (case_ordering_get_value_cnt (ordering));
+    = settings_get_workspace_cases (value_cnt);
   if (pq->record_cap > max_buffers)
     pq->record_cap = max_buffers;
   else if (pq->record_cap < min_buffers)
diff --git a/src/math/sort.h b/src/math/sort.h
index 7f7b2f8..ea2c16b 100644
--- a/src/math/sort.h
+++ b/src/math/sort.h
@@ -25,7 +25,7 @@ struct case_ordering;
 extern int min_buffers ;
 extern int max_buffers ;
 
-struct casewriter *sort_create_writer (struct case_ordering *);
+struct casewriter *sort_create_writer (struct case_ordering *, size_t 
value_cnt);
 struct casereader *sort_execute (struct casereader *, struct case_ordering *);
 
 #endif /* math/sort.h */




-- 
PGP Public key ID: 1024D/2DE827B3 
fingerprint = 8797 A26D 0854 2EAB 0285  A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.


Attachment: signature.asc
Description: Digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]