diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/ChangeLog coreutils-cvs-2-sort-with-isaac/ChangeLog --- coreutils-cvs-1-shred-isaac-split/ChangeLog 2005-11-24 20:10:35.000000000 +0000 +++ coreutils-cvs-2-sort-with-isaac/ChangeLog 2005-11-26 04:34:39.000000000 +0000 @@ -1,3 +1,19 @@ +2005-11-25 Frederik Eaton + + * src/shred.c: + * src/isaac.h: + * src/isaac.c: transfer ISAAC code (for cryptographic random + number generation) from shred.c to new files isaac.c, isaac.h + + * src/Makefile.am (shred_SOURCES): isaac.c is now a source of shred + + * src/Makefile.am (sort_SOURCES, sort_LDADD): changes to reflect + that sort uses ISAAC + + * src/sort.c (short_options, long_options, rand_state, HASH_SIZE, + HASH_WORDS, get_hash, keycompare, main): add options --random-sort + and --seed to implement a random shuffle + 2005-11-24 Jim Meyering * Version 6.0-cvs. diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/doc/coreutils.texi coreutils-cvs-2-sort-with-isaac/doc/coreutils.texi --- coreutils-cvs-1-shred-isaac-split/doc/coreutils.texi 2005-11-24 20:10:37.000000000 +0000 +++ coreutils-cvs-2-sort-with-isaac/doc/coreutils.texi 2005-11-26 04:17:08.000000000 +0000 @@ -3396,6 +3396,15 @@ Reverse the result of comparison, so that lines with greater key values appear earlier in the output instead of later. address@hidden -R address@hidden --random-sort address@hidden -R address@hidden --random-sort address@hidden random sort + +Sort by random hash, i.e. perform a shuffle. This is done by hashing +the input keys and sorting based on the results. + @end table Other options are: @@ -3529,6 +3538,11 @@ reliably handle arbitrary file names (even those containing blanks or other special characters). address@hidden --seed @var{tempdir} address@hidden --seed address@hidden specify seed for random hash +Specify a seed for the @option{--random-sort} option. + @end table Historical (BSD and System V) implementations of @command{sort} have diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/isaac.h coreutils-cvs-2-sort-with-isaac/src/isaac.h --- coreutils-cvs-1-shred-isaac-split/src/isaac.h 2005-10-24 09:49:19.000000000 +0100 +++ coreutils-cvs-2-sort-with-isaac/src/isaac.h 2005-11-26 05:48:28.000000000 +0000 @@ -1,5 +1,7 @@ -/* Size of the state tables to use. (You may change ISAAC_LOG) */ -#define ISAAC_LOG 8 +/* Size of the state tables to use. */ +/* (You may change ISAAC_LOG but it should be >= 3, and larger values + will slow down "sort --random") */ +#define ISAAC_LOG 3 #define ISAAC_WORDS (1 << ISAAC_LOG) #define ISAAC_BYTES (ISAAC_WORDS * sizeof (uint32_t)) diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/Makefile.am coreutils-cvs-2-sort-with-isaac/src/Makefile.am --- coreutils-cvs-1-shred-isaac-split/src/Makefile.am 2005-11-13 05:01:50.000000000 +0000 +++ coreutils-cvs-2-sort-with-isaac/src/Makefile.am 2005-11-26 04:06:01.000000000 +0000 @@ -69,7 +69,7 @@ vdir_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) ## If necessary, add -lm to resolve use of pow in lib/strtod.c. -sort_LDADD = $(LDADD) $(POW_LIB) +sort_LDADD = $(LDADD) $(POW_LIB) $(LIB_GETHRXTIME) # for get_date and gettime date_LDADD = $(LDADD) $(LIB_CLOCK_GETTIME) @@ -167,6 +167,7 @@ chown_SOURCES = chown.c chown-core.c chgrp_SOURCES = chgrp.c chown-core.c shred_SOURCES = shred.c isaac.c +sort_SOURCES = sort.c isaac.c mv_SOURCES = mv.c copy.c cp-hash.c remove.c rm_SOURCES = rm.c remove.c diff -N -ur -x '*.in' -x Makefile -x configure -x dircolors.h -x groups -x '*~' -x wheel.h -x 'localedir.*' -x 'stamp-*' -x wheel-size.h -x alloca.h -x .deps -x charset.alias -x getdate.c -x ref-add.sed -x ref-del.sed -x po -x config.h -x config.log -x config.status -x coreutils.info -x autom4te.cache -x 'output.*' -x confdefs.h -x '.#*' -x CVS -x tests -x '*.1' -x config.hin -x '*.orig' -x '*.rej' -x version.texi coreutils-cvs-1-shred-isaac-split/src/sort.c coreutils-cvs-2-sort-with-isaac/src/sort.c --- coreutils-cvs-1-shred-isaac-split/src/sort.c 2005-10-07 19:48:28.000000000 +0100 +++ coreutils-cvs-2-sort-with-isaac/src/sort.c 2005-11-26 07:19:31.000000000 +0000 @@ -38,6 +38,8 @@ #include "strnumcmp.h" #include "xmemcoll.h" #include "xstrtol.h" +#include "isaac.h" +#include "md5.h" #if HAVE_SYS_RESOURCE_H # include @@ -147,6 +149,7 @@ bool numeric; /* Flag for numeric comparison. Handle strings of digits with optional decimal point, but no exponential notation. */ + bool random_hash; /* Shuffle by sorting on random hash of key */ bool general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ bool month; /* Flag for comparison by month name. */ @@ -303,6 +306,7 @@ -i, --ignore-nonprinting consider only printable characters\n\ -M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\ -n, --numeric-sort compare according to string numerical value\n\ + -R, --random-sort sort by random hash of keys\n\ -r, --reverse reverse the result of comparisons\n\ \n\ "), stdout); @@ -325,6 +329,7 @@ "), DEFAULT_TMPDIR); fputs (_("\ -z, --zero-terminated end lines with 0 byte, not newline\n\ + --seed use specified seed for random number generator\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -353,7 +358,11 @@ exit (status); } -static char const short_options[] = "-bcdfgik:mMno:rsS:t:T:uy:z"; +static char const short_options[] = "-bcdfgik:mMno:rRsS:t:T:uy:z"; +enum +{ + SEED_OPTION = CHAR_MAX + 1 +}; static struct option const long_options[] = { @@ -367,6 +376,7 @@ {"merge", no_argument, NULL, 'm'}, {"month-sort", no_argument, NULL, 'M'}, {"numeric-sort", no_argument, NULL, 'n'}, + {"random-sort", no_argument, NULL, 'R'}, {"output", required_argument, NULL, 'o'}, {"reverse", no_argument, NULL, 'r'}, {"stable", no_argument, NULL, 's'}, @@ -375,6 +385,7 @@ {"temporary-directory", required_argument, NULL, 'T'}, {"unique", no_argument, NULL, 'u'}, {"zero-terminated", no_argument, NULL, 'z'}, + {"seed", required_argument, NULL, SEED_OPTION}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0}, @@ -1152,6 +1163,29 @@ return 0; } +struct isaac_state *rand_state; + +#define HASH_WORDS 1 +#define HASH_SIZE (HASH_WORDS * sizeof (uint32_t)) + +static void +get_hash (char* text, int len, uint32_t resbuf[/*HASH_WORDS*/]) +{ + struct isaac_state *s = alloca (sizeof (struct isaac_state)); + struct irand_state *r = alloca (sizeof (struct irand_state)); + int i; + memcpy (s, rand_state, sizeof (struct isaac_state)); + isaac_seed_data (s, text, len); + /* we can call isaac_seed_finish multiple times, but should only + call isaac_seed_start once */ + isaac_seed_finish (s); + /* getting an irand_state and drawing random numbers would be more + kosher here, but also slower. so we just peek at the ISAAC state + array instead */ + for (i = 0; i < HASH_WORDS; i++) + resbuf[i] = s->mm[ISAAC_WORDS - 1 - i]; +} + /* Compare two lines A and B trying every key in sequence until there are no more keys or a difference is found. */ @@ -1188,6 +1222,14 @@ (texta, textb)); *lima = savea, *limb = saveb; } + else if (key->random_hash) + { + char diga[HASH_SIZE]; + char digb[HASH_SIZE]; + get_hash (texta, lena, (uint32_t*) diga); + get_hash (textb, lenb, (uint32_t*) digb); + diff = memcmp (diga, digb, sizeof (diga)); + } else if (key->month) diff = getmonth (texta, lena) - getmonth (textb, lenb); /* Sorting like this may become slow, so in a simple locale the user @@ -1989,6 +2031,7 @@ { error (SORT_FAILURE, 0, _("%s: invalid field specification %s"), _(msgid), quote (spec)); + /* added to satisfy compiler for NORETURN: */ abort (); } @@ -2081,6 +2124,9 @@ case 'n': key->numeric = true; break; + case 'R': + key->random_hash = true; + break; case 'r': key->reverse = true; break; @@ -2109,6 +2155,8 @@ int c = 0; bool checkonly = false; bool mergeonly = false; + char *randseed = 0; + bool need_rand = false; size_t nfiles = 0; bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); bool obsolete_usage = (posix2_version () < 200112); @@ -2187,7 +2235,7 @@ gkey.sword = gkey.eword = SIZE_MAX; gkey.ignore = NULL; gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false; + gkey.numeric = gkey.general_numeric = gkey.random_hash = gkey.month = gkey.reverse = false; gkey.skipsblanks = gkey.skipeblanks = false; files = xnmalloc (argc, sizeof *files); @@ -2263,6 +2311,7 @@ case 'M': case 'n': case 'r': + case 'R': { char str[2]; str[0] = c; @@ -2404,6 +2453,10 @@ eolchar = 0; break; + case SEED_OPTION: + randseed = strdupa (optarg); + break; + case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); @@ -2413,26 +2466,44 @@ } } + need_rand |= gkey.random_hash; /* Inheritance of global options to individual keys. */ - for (key = keylist; key; key = key->next) - if (! (key->ignore || key->translate - || (key->skipsblanks | key->reverse - | key->skipeblanks | key->month | key->numeric - | key->general_numeric))) - { - key->ignore = gkey.ignore; - key->translate = gkey.translate; - key->skipsblanks = gkey.skipsblanks; - key->skipeblanks = gkey.skipeblanks; - key->month = gkey.month; - key->numeric = gkey.numeric; - key->general_numeric = gkey.general_numeric; - key->reverse = gkey.reverse; - } + for (key = keylist; key; key = key->next) + { + need_rand |= key->random_hash; + if (! (key->ignore || key->translate + || (key->skipsblanks | key->reverse + | key->skipeblanks | key->month | key->numeric + | key->general_numeric + | key->random_hash))) + { + key->ignore = gkey.ignore; + key->translate = gkey.translate; + key->skipsblanks = gkey.skipsblanks; + key->skipeblanks = gkey.skipeblanks; + key->month = gkey.month; + key->numeric = gkey.numeric; + key->general_numeric = gkey.general_numeric; + key->random_hash = gkey.random_hash; + key->reverse = gkey.reverse; + } + } + + if (need_rand) { + rand_state = xmalloc (sizeof (struct isaac_state)); + memset (rand_state, 0, sizeof (struct isaac_state)); + if(randseed) { + isaac_seed_start (rand_state); + isaac_seed_data (rand_state, randseed, strlen (randseed)); + isaac_seed_finish (rand_state); + } else + isaac_seed (rand_state); + } if (!keylist && (gkey.ignore || gkey.translate || (gkey.skipsblanks | gkey.skipeblanks | gkey.month - | gkey.numeric | gkey.general_numeric))) + | gkey.numeric | gkey.general_numeric + | gkey.random_hash))) insertkey (&gkey); reverse = gkey.reverse;