diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/ChangeLog coreutils-cvs-2-sort-with-isaac/ChangeLog
--- coreutils-cvs-1-shred-isaac-split/ChangeLog 2005-11-24 20:10:35.000000000 +0000
+++ coreutils-cvs-2-sort-with-isaac/ChangeLog 2005-11-26 04:34:39.000000000 +0000
@@ -1,3 +1,19 @@
+2005-11-25 Frederik Eaton
+
+ * src/shred.c:
+ * src/isaac.h:
+ * src/isaac.c: transfer ISAAC code (for cryptographic random
+ number generation) from shred.c to new files isaac.c, isaac.h
+
+ * src/Makefile.am (shred_SOURCES): isaac.c is now a source of shred
+
+ * src/Makefile.am (sort_SOURCES, sort_LDADD): changes to reflect
+ that sort uses ISAAC
+
+ * src/sort.c (short_options, long_options, rand_state, HASH_SIZE,
+ HASH_WORDS, get_hash, keycompare, main): add options --random-sort
+ and --seed to implement a random shuffle
+
2005-11-24 Jim Meyering
* Version 6.0-cvs.
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/doc/coreutils.texi coreutils-cvs-2-sort-with-isaac/doc/coreutils.texi
--- coreutils-cvs-1-shred-isaac-split/doc/coreutils.texi 2005-11-24 20:10:37.000000000 +0000
+++ coreutils-cvs-2-sort-with-isaac/doc/coreutils.texi 2005-11-26 04:17:08.000000000 +0000
@@ -3396,6 +3396,15 @@
Reverse the result of comparison, so that lines with greater key values
appear earlier in the output instead of later.
address@hidden -R
address@hidden --random-sort
address@hidden -R
address@hidden --random-sort
address@hidden random sort
+
+Sort by random hash, i.e. perform a shuffle. This is done by hashing
+the input keys and sorting based on the results.
+
@end table
Other options are:
@@ -3529,6 +3538,11 @@
reliably handle arbitrary file names (even those containing blanks
or other special characters).
address@hidden --seed @var{tempdir}
address@hidden --seed
address@hidden specify seed for random hash
+Specify a seed for the @option{--random-sort} option.
+
@end table
Historical (BSD and System V) implementations of @command{sort} have
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/isaac.h coreutils-cvs-2-sort-with-isaac/src/isaac.h
--- coreutils-cvs-1-shred-isaac-split/src/isaac.h 2005-10-24 09:49:19.000000000 +0100
+++ coreutils-cvs-2-sort-with-isaac/src/isaac.h 2005-11-26 05:48:28.000000000 +0000
@@ -1,5 +1,7 @@
-/* Size of the state tables to use. (You may change ISAAC_LOG) */
-#define ISAAC_LOG 8
+/* Size of the state tables to use. */
+/* (You may change ISAAC_LOG but it should be >= 3, and larger values
+ will slow down "sort --random") */
+#define ISAAC_LOG 3
#define ISAAC_WORDS (1 << ISAAC_LOG)
#define ISAAC_BYTES (ISAAC_WORDS * sizeof (uint32_t))
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/Makefile.am coreutils-cvs-2-sort-with-isaac/src/Makefile.am
--- coreutils-cvs-1-shred-isaac-split/src/Makefile.am 2005-11-13 05:01:50.000000000 +0000
+++ coreutils-cvs-2-sort-with-isaac/src/Makefile.am 2005-11-26 04:06:01.000000000 +0000
@@ -69,7 +69,7 @@
vdir_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME)
## If necessary, add -lm to resolve use of pow in lib/strtod.c.
-sort_LDADD = $(LDADD) $(POW_LIB)
+sort_LDADD = $(LDADD) $(POW_LIB) $(LIB_GETHRXTIME)
# for get_date and gettime
date_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME)
@@ -167,6 +167,7 @@
chown_SOURCES = chown.c chown-core.c
chgrp_SOURCES = chgrp.c chown-core.c
shred_SOURCES = shred.c isaac.c
+sort_SOURCES = sort.c isaac.c
mv_SOURCES = mv.c copy.c cp-hash.c remove.c
rm_SOURCES = rm.c remove.c
diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/sort.c coreutils-cvs-2-sort-with-isaac/src/sort.c
--- coreutils-cvs-1-shred-isaac-split/src/sort.c 2005-10-07 19:48:28.000000000 +0100
+++ coreutils-cvs-2-sort-with-isaac/src/sort.c 2005-11-26 06:02:54.000000000 +0000
@@ -38,6 +38,8 @@
#include "strnumcmp.h"
#include "xmemcoll.h"
#include "xstrtol.h"
+#include "isaac.h"
+#include "md5.h"
#if HAVE_SYS_RESOURCE_H
# include
@@ -147,6 +149,7 @@
bool numeric; /* Flag for numeric comparison. Handle
strings of digits with optional decimal
point, but no exponential notation. */
+ bool random_hash; /* Shuffle by sorting on random hash of key */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
bool month; /* Flag for comparison by month name. */
@@ -303,6 +306,7 @@
-i, --ignore-nonprinting consider only printable characters\n\
-M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
-n, --numeric-sort compare according to string numerical value\n\
+ -R, --random-sort sort by random hash of keys\n\
-r, --reverse reverse the result of comparisons\n\
\n\
"), stdout);
@@ -325,6 +329,7 @@
"), DEFAULT_TMPDIR);
fputs (_("\
-z, --zero-terminated end lines with 0 byte, not newline\n\
+ --seed use specified seed for random number generator\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -353,7 +358,11 @@
exit (status);
}
-static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z";
+static char const short_options[] = "-bcdfgik:mMno:rRsS:t:T:uy:z";
+enum
+{
+ SEED_OPTION = CHAR_MAX + 1
+};
static struct option const long_options[] =
{
@@ -367,6 +376,7 @@
{"merge", no_argument, NULL, 'm'},
{"month-sort", no_argument, NULL, 'M'},
{"numeric-sort", no_argument, NULL, 'n'},
+ {"random-sort", no_argument, NULL, 'R'},
{"output", required_argument, NULL, 'o'},
{"reverse", no_argument, NULL, 'r'},
{"stable", no_argument, NULL, 's'},
@@ -375,6 +385,7 @@
{"temporary-directory", required_argument, NULL, 'T'},
{"unique", no_argument, NULL, 'u'},
{"zero-terminated", no_argument, NULL, 'z'},
+ {"seed", required_argument, NULL, SEED_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0},
@@ -1152,6 +1163,29 @@
return 0;
}
+struct isaac_state *rand_state;
+
+#define HASH_WORDS 1
+#define HASH_SIZE (HASH_WORDS*sizeof(uint32_t))
+
+static void
+get_hash(char* text, int len, uint32_t resbuf[/*HASH_WORDS*/])
+{
+ struct isaac_state *s = alloca(sizeof(struct isaac_state));
+ struct irand_state *r = alloca(sizeof(struct irand_state));
+ int i;
+ memcpy (s, rand_state, sizeof(struct isaac_state));
+ isaac_seed_data (s, text, len);
+ /* we can call isaac_seed_finish multiple times, but should only
+ call isaac_seed_start once */
+ isaac_seed_finish (s);
+ /* getting an irand_state and drawing random numbers would be more
+ kosher here, but also slower. so we just peek at the ISAAC state
+ array instead */
+ for (i=0; imm[ISAAC_WORDS-1-i];
+}
+
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -1188,6 +1222,14 @@
(texta, textb));
*lima = savea, *limb = saveb;
}
+ else if (key->random_hash)
+ {
+ char diga[HASH_SIZE];
+ char digb[HASH_SIZE];
+ get_hash(texta, lena, (uint32_t*) diga);
+ get_hash(textb, lenb, (uint32_t*) digb);
+ diff = memcmp(diga, digb, sizeof(diga));
+ }
else if (key->month)
diff = getmonth (texta, lena) - getmonth (textb, lenb);
/* Sorting like this may become slow, so in a simple locale the user
@@ -1989,6 +2031,7 @@
{
error (SORT_FAILURE, 0, _("%s: invalid field specification %s"),
_(msgid), quote (spec));
+ /* added to satisfy compiler for NORETURN: */
abort ();
}
@@ -2081,6 +2124,9 @@
case 'n':
key->numeric = true;
break;
+ case 'R':
+ key->random_hash = true;
+ break;
case 'r':
key->reverse = true;
break;
@@ -2109,6 +2155,8 @@
int c = 0;
bool checkonly = false;
bool mergeonly = false;
+ char *randseed = 0;
+ bool need_rand = false;
size_t nfiles = 0;
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
bool obsolete_usage = (posix2_version () < 200112);
@@ -2187,7 +2235,7 @@
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
+ gkey.numeric = gkey.general_numeric = gkey.random_hash = gkey.month = gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
files = xnmalloc (argc, sizeof *files);
@@ -2263,6 +2311,7 @@
case 'M':
case 'n':
case 'r':
+ case 'R':
{
char str[2];
str[0] = c;
@@ -2404,6 +2453,10 @@
eolchar = 0;
break;
+ case SEED_OPTION:
+ randseed = strdupa(optarg);
+ break;
+
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -2413,26 +2466,44 @@
}
}
+ need_rand |= gkey.random_hash;
/* Inheritance of global options to individual keys. */
- for (key = keylist; key; key = key->next)
- if (! (key->ignore || key->translate
- || (key->skipsblanks | key->reverse
- | key->skipeblanks | key->month | key->numeric
- | key->general_numeric)))
- {
- key->ignore = gkey.ignore;
- key->translate = gkey.translate;
- key->skipsblanks = gkey.skipsblanks;
- key->skipeblanks = gkey.skipeblanks;
- key->month = gkey.month;
- key->numeric = gkey.numeric;
- key->general_numeric = gkey.general_numeric;
- key->reverse = gkey.reverse;
- }
+ for (key = keylist; key; key = key->next)
+ {
+ need_rand |= key->random_hash;
+ if (! (key->ignore || key->translate
+ || (key->skipsblanks | key->reverse
+ | key->skipeblanks | key->month | key->numeric
+ | key->general_numeric
+ | key->random_hash)))
+ {
+ key->ignore = gkey.ignore;
+ key->translate = gkey.translate;
+ key->skipsblanks = gkey.skipsblanks;
+ key->skipeblanks = gkey.skipeblanks;
+ key->month = gkey.month;
+ key->numeric = gkey.numeric;
+ key->general_numeric = gkey.general_numeric;
+ key->random_hash = gkey.random_hash;
+ key->reverse = gkey.reverse;
+ }
+ }
+
+ if (need_rand) {
+ rand_state = xmalloc (sizeof (struct isaac_state));
+ memset (rand_state, 0, sizeof (struct isaac_state));
+ if(randseed) {
+ isaac_seed_start (rand_state);
+ isaac_seed_data (rand_state, randseed, strlen(randseed));
+ isaac_seed_finish (rand_state);
+ } else
+ isaac_seed (rand_state);
+ }
if (!keylist && (gkey.ignore || gkey.translate
|| (gkey.skipsblanks | gkey.skipeblanks | gkey.month
- | gkey.numeric | gkey.general_numeric)))
+ | gkey.numeric | gkey.general_numeric
+ | gkey.random_hash)))
insertkey (&gkey);
reverse = gkey.reverse;