diff -ruN --exclude '*.in' --exclude configure --exclude sort.1 --exclude '*~' coreutils-5.2.1/ChangeLog coreutils-5.2.1-patch-3/ChangeLog
--- coreutils-5.2.1/ChangeLog 2004-03-12 19:03:44.000000000 +0000
+++ coreutils-5.2.1-patch-3/ChangeLog 2005-07-14 15:27:38.000000000 +0100
@@ -1,3 +1,17 @@
+2005-07-14 Frederik Eaton
+
+ Add --random, --seed options to 'sort' to get shuffle-like
+ functionality.
+
+ * src/sort.c: add functions init_salt, get_hash; 'init_salt' is
+ used to provide seeding, and 'get_hash' calculates the hash value
+ by which a key is sorted
+ * src/checksum.h: stuff to make it possible to switch between
+ different hash algorithms at runtime
+ * src/randseed.h:
+ * src/randseed.c: read a fixed-length random seed from entropy
+ devices. adapted from libgcrypt
+
2004-03-12 Jim Meyering
* Version 5.2.1.
diff -ruN --exclude '*.in' --exclude configure --exclude sort.1 --exclude '*~' coreutils-5.2.1/src/checksum.h coreutils-5.2.1-patch-3/src/checksum.h
--- coreutils-5.2.1/src/checksum.h 2003-04-11 10:07:21.000000000 +0100
+++ coreutils-5.2.1-patch-3/src/checksum.h 2005-07-14 15:27:38.000000000 +0100
@@ -1,8 +1,14 @@
+#ifndef _CHECKSUM_H
+#define _CHECKSUM_H 1
+
#include
#include
#include
+#include "sha1.h"
+#include "md5.h"
+
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
@@ -13,3 +19,39 @@
};
extern int algorithm;
+
+struct digest_ops {
+ int ctx_size;
+ int bits;
+ void (*init_ctx) (void *ctx);
+ void (*process_bytes) (const void *buffer, size_t len, void *ctx);
+ void *(*finish_ctx) (void *ctx, void *resbuf);
+ void *(*read_ctx) (void *ctx, void *resbuf);
+};
+
+static inline
+void get_digest_ops(int alg, struct digest_ops *res) {
+ switch(alg) {
+ case ALG_MD5:
+ res->ctx_size = sizeof(struct md5_ctx);
+ res->bits = 128;
+ res->init_ctx = (void (*) (void *))md5_init_ctx;
+ res->process_bytes = (void (*) (const void *, size_t, void *))md5_process_bytes;
+ res->finish_ctx = (void *(*) (void *, void *))md5_finish_ctx;
+ res->read_ctx = (void *(*) (void *, void *))md5_read_ctx;
+ break;
+ case ALG_SHA1:
+ res->ctx_size = sizeof(struct sha_ctx);
+ res->bits = 160;
+ res->init_ctx = (void (*) (void *))sha_init_ctx;
+ res->process_bytes = (void (*) (const void *, size_t, void *))sha_process_bytes;
+ res->finish_ctx = (void *(*) (void *, void *))sha_finish_ctx;
+ res->read_ctx = (void *(*) (void *, void *))sha_read_ctx;
+ break;
+ default:
+ /* abort? */
+ break;
+ }
+}
+
+#endif
diff -ruN --exclude '*.in' --exclude configure --exclude sort.1 --exclude '*~' coreutils-5.2.1/src/Makefile.am coreutils-5.2.1-patch-3/src/Makefile.am
--- coreutils-5.2.1/src/Makefile.am 2004-02-02 08:12:57.000000000 +0000
+++ coreutils-5.2.1-patch-3/src/Makefile.am 2005-07-14 15:45:22.000000000 +0100
@@ -147,6 +147,8 @@
md5sum_SOURCES = md5sum.c md5.c
sha1sum_SOURCES = md5sum.c sha1sum.c
+sort_SOURCES = sort.c md5.c randseed.c
+
editpl = sed -e 's,@''PERL''@,$(PERL),g'
localedir = $(datadir)/locale
diff -ruN --exclude '*.in' --exclude configure --exclude sort.1 --exclude '*~' coreutils-5.2.1/src/randseed.c coreutils-5.2.1-patch-3/src/randseed.c
--- coreutils-5.2.1/src/randseed.c 1970-01-01 01:00:00.000000000 +0100
+++ coreutils-5.2.1-patch-3/src/randseed.c 2005-07-14 16:01:50.000000000 +0100
@@ -0,0 +1,393 @@
+/* randseed.c
+
+Code for portably reading a random seed from entropy devices. Mostly
+adapted from libgcrypt.
+
+June 2005 Frederik Eaton
+*/
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "checksum.h"
+
+// XXX put these in autoconf
+#define NAME_OF_DEV_RANDOM "/dev/random"
+#define NAME_OF_DEV_URANDOM "/dev/urandom"
+#define USE_RNDLINUX 1
+#define USE_RNDEGD 1
+
+#define SORT_FAILURE 2
+#define FAIL_CODE SORT_FAILURE
+
+static int gather_random(struct digest_ops *ops, void *ctx, int level);
+
+#if USE_RNDLINUX
+static int linux_gather_random (struct digest_ops *ops, void *ctx, int level);
+#endif
+
+#if USE_RNDEGD
+static int egd_gather_random (struct digest_ops *ops, void *ctx, int level);
+static int egd_connect_socket (int nofail);
+#endif
+
+int
+string_seed (struct digest_ops *ops, void *ctx, const char *str)
+{
+ int len = strlen(str);
+ ops->process_bytes(str, len, ctx);
+ return (len*2) >= ops->bits;
+}
+
+int
+default_seed (struct digest_ops *ops, void *ctx)
+{
+ gather_random(ops,ctx,1);
+}
+
+static int
+gather_random (struct digest_ops *ops, void *ctx, int level)
+{
+#if USE_RNDLINUX
+ if ( !access (NAME_OF_DEV_RANDOM, R_OK)
+ && !access (NAME_OF_DEV_URANDOM, R_OK))
+ {
+ return linux_gather_random(ops, ctx, level);
+ }
+#endif
+
+#if USE_RNDEGD
+ if ( egd_connect_socket (1) != -1 )
+ {
+ return egd_gather_random(ops, ctx, level);
+ }
+#endif
+
+ {
+ unsigned long l;
+ l = time(NULL);
+ ops->process_bytes(&l, sizeof(l), ctx);
+ l = getpid();;
+ ops->process_bytes(&l, sizeof(l), ctx);
+ l = getuid();
+ ops->process_bytes(&l, sizeof(l), ctx);
+ }
+
+ return 0;
+}
+
+//----------------------------------------------------------------
+
+int
+open_dev (char *name)
+{
+ int fd;
+ fd = open( name, O_RDONLY );
+ if( fd == -1 )
+ error (FAIL_CODE, errno, "can't open %s\n", name);
+ return fd;
+}
+
+#if USE_RNDLINUX
+static int
+linux_gather_random (struct digest_ops *ops, void *ctx, int level)
+{
+ static int fd_urandom = -1;
+ static int fd_random = -1;
+ int fd;
+ int n;
+ int length = ops->bits/8;
+ int warn=0;
+ char buffer[768];
+
+ if( level >= 2 )
+ {
+ if( fd_random == -1 )
+ fd_random = open_dev( NAME_OF_DEV_RANDOM );
+ fd = fd_random;
+ }
+ else
+ {
+ if( fd_urandom == -1 )
+ fd_urandom = open_dev( NAME_OF_DEV_URANDOM );
+ fd = fd_urandom;
+ }
+
+ while (length)
+ {
+ fd_set rfds;
+ struct timeval tv;
+ int rc;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ tv.tv_sec = 0;
+ tv.tv_usec = 20*1000;
+ if( !(rc=select(fd+1, &rfds, NULL, NULL, &tv)) )
+ {
+ if( !warn )
+ {
+ error (0,0,"need_entropy");
+ warn = 1;
+ }
+ continue;
+ }
+ else if( rc == -1 )
+ {
+ error (0, errno, "select()\n");
+ continue;
+ }
+
+ do
+ {
+ int nbytes = length < sizeof(buffer)? length : sizeof(buffer);
+ n = read(fd, buffer, nbytes );
+ if( n >= 0 && n > nbytes )
+ {
+ error(0,0,"bogus read from random device (n=%d)\n", n );
+ n = nbytes;
+ }
+ }
+ while( n == -1 && errno == EINTR );
+ if( n == -1 )
+ error(FAIL_CODE, errno, "read error on random device: %s\n");
+ ops->process_bytes (buffer, n, ctx);
+ length -= n;
+ }
+ memset(buffer, 0, sizeof(buffer));
+
+ return 0; /* success */
+}
+#endif
+
+//----------------------------------------------------------------
+
+#if USE_RNDEGD
+
+#ifndef offsetof
+#define offsetof(type, member) ((size_t) &((type *)0)->member)
+#endif
+
+static int egd_socket = -1;
+
+/* Allocate a new filename from FIRST_PART and SECOND_PART and to
+ tilde expansion for first_part. SECOND_PART might be NULL.
+ */
+static char *
+my_make_filename (const char *first_part, const char *second_part)
+{
+ size_t n;
+ char *name, *home, *p;
+
+ n = strlen(first_part)+1;
+ if (second_part)
+ n += strlen (second_part) + 1;
+
+ home = NULL;
+ if( *first_part == '~' && first_part[1] == '/'
+ && (home = (char*)getenv("HOME")) && *home )
+ n += strlen(home);
+
+ name = (char*)xmalloc(n);
+ p = (char*)(home
+ ? stpcpy (stpcpy (name, home), first_part+1 )
+ : stpcpy (name, first_part) );
+
+ if (second_part)
+ strcpy ((char*)stpcpy(p,"/"), second_part);
+
+ return name;
+}
+
+
+static int
+do_write ( int fd, void *buf, size_t nbytes )
+{
+ size_t nleft = nbytes;
+ int nwritten;
+
+ while( nleft > 0 )
+ {
+ nwritten = write( fd, buf, nleft);
+ if( nwritten < 0 )
+ {
+ if( errno == EINTR )
+ continue;
+ return -1;
+ }
+ nleft -= nwritten;
+ buf = (char*)buf + nwritten;
+ }
+ return 0;
+}
+
+static int
+do_read ( int fd, void *buf, size_t nbytes )
+{
+ int n, nread = 0;
+
+ do
+ {
+ do
+ {
+ n = read(fd, (char*)buf + nread, nbytes );
+ }
+ while( n == -1 && errno == EINTR );
+ if( n == -1)
+ return nread? nread:-1;
+ if( n == 0)
+ return -1;
+ nread += n;
+ nbytes -= n;
+ }
+ while( nread < nbytes );
+ return nread;
+}
+
+/* Connect to the EGD and return the file descriptor. Return -1 on
+ error. With NOFAIL set to true, silently fail and return the
+ error, otherwise print an error message and die. */
+static int
+egd_connect_socket (int nofail)
+{
+ int fd;
+ const char *bname = NULL;
+ char *name;
+ struct sockaddr_un addr;
+ int addr_len;
+
+ if (egd_socket != -1)
+ {
+ close (egd_socket);
+ egd_socket = -1;
+ }
+
+#ifdef EGD_SOCKET_NAME
+ bname = EGD_SOCKET_NAME;
+#endif
+ if ( !bname || !*bname )
+ name = my_make_filename ("~/.gnupg", "entropy");
+ else
+ name = my_make_filename (bname, NULL);
+
+ if (strlen(name)+1 >= sizeof addr.sun_path)
+ error (FAIL_CODE, 0, "EGD socketname is too long\n");
+
+ memset( &addr, 0, sizeof addr );
+ addr.sun_family = AF_UNIX;
+ strcpy( addr.sun_path, name );
+ addr_len = (offsetof( struct sockaddr_un, sun_path )
+ + strlen( addr.sun_path ));
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (fd == -1 && !nofail)
+ error (FAIL_CODE, errno, "can't create unix domain socket\n");
+ else if (connect (fd, (struct sockaddr*)&addr, addr_len) == -1)
+ {
+ if (!nofail)
+ error (FAIL_CODE, errno, "can't connect to EGD socket `%s'\n");
+ close (fd);
+ fd = -1;
+ }
+ free(name);
+ if (fd != -1)
+ egd_socket = fd;
+ return fd;
+}
+
+/****************
+ * Note: We always use the highest level.
+ * To boost the performance we may want to add some
+ * additional code for level 1
+ *
+ * Using a level of 0 should never block and better add nothing
+ * to the pool. So this is just a dummy for EGD.
+ */
+static int
+egd_gather_random (struct digest_ops *ops, void *ctx, int level)
+{
+ int fd = egd_socket;
+ int n;
+ char buffer[256+2];
+ int nbytes;
+ int do_restart = 0;
+ int length = ops->bits/8;
+
+ if( !length )
+ return 0;
+ if( !level )
+ return 0;
+
+ restart:
+ if (fd == -1 || do_restart)
+ fd = egd_connect_socket (0);
+
+ do_restart = 0;
+
+ nbytes = length < 255? length : 255;
+ /* First time we do it with a non blocking request */
+ buffer[0] = 1; /* non blocking */
+ buffer[1] = nbytes;
+ if( do_write( fd, buffer, 2 ) == -1 )
+ error(FAIL_CODE, errno, "can't write to the EGD: %s\n");
+ n = do_read( fd, buffer, 1 );
+ if( n == -1 )
+ {
+ error (FAIL_CODE, errno, "read error on EGD\n");
+ do_restart = 1;
+ goto restart;
+ }
+ n = buffer[0];
+ if( n )
+ {
+ n = do_read( fd, buffer, n );
+ if( n == -1 )
+ {
+ error (FAIL_CODE, errno, "read error on EGD\n");
+ do_restart = 1;
+ goto restart;
+ }
+ ops->process_bytes (buffer, n, ctx);
+ length -= n;
+ }
+
+ if( length )
+ {
+ fprintf (stderr,
+ "Please wait, entropy is being gathered. Do some work if it would\n"
+ "keep you from getting bored, because it will improve the quality\n"
+ "of the entropy.\n");
+ }
+ while( length )
+ {
+ nbytes = length < 255? length : 255;
+
+ buffer[0] = 2; /* blocking */
+ buffer[1] = nbytes;
+ if( do_write( fd, buffer, 2 ) == -1 )
+ error (FAIL_CODE, errno, "can't write to the EGD\n");
+ n = do_read( fd, buffer, nbytes );
+ if( n == -1 )
+ {
+ error (FAIL_CODE, errno, "read error on EGD\n");
+ do_restart = 1;
+ goto restart;
+ }
+ ops->process_bytes (buffer, n, ctx);
+ length -= n;
+ }
+ memset(buffer, 0, sizeof(buffer) );
+
+ return 0; /* success */
+}
+
+#endif
diff -ruN --exclude '*.in' --exclude configure --exclude sort.1 --exclude '*~' coreutils-5.2.1/src/randseed.h coreutils-5.2.1-patch-3/src/randseed.h
--- coreutils-5.2.1/src/randseed.h 1970-01-01 01:00:00.000000000 +0100
+++ coreutils-5.2.1-patch-3/src/randseed.h 2005-07-14 15:59:59.000000000 +0100
@@ -0,0 +1,4 @@
+#include "checksum.h"
+
+int string_seed(struct digest_ops *ops, void *ctx, char *str);
+int default_seed(struct digest_ops *ops, void *ctx);
diff -ruN --exclude '*.in' --exclude configure --exclude sort.1 --exclude '*~' coreutils-5.2.1/src/sort.c coreutils-5.2.1-patch-3/src/sort.c
--- coreutils-5.2.1/src/sort.c 2004-02-17 10:47:35.000000000 +0000
+++ coreutils-5.2.1-patch-3/src/sort.c 2005-07-14 16:02:03.000000000 +0100
@@ -37,6 +37,9 @@
#include "stdio-safer.h"
#include "xmemcoll.h"
#include "xstrtol.h"
+#include "checksum.h"
+#include "randseed.h"
+#include "md5.h"
#if HAVE_SYS_RESOURCE_H
# include
@@ -153,6 +156,7 @@
bool numeric; /* Flag for numeric comparison. Handle
strings of digits with optional decimal
point, but no exponential notation. */
+ bool random_hash; /* Shuffle by sorting on random hash of key */
bool general_numeric; /* Flag for general, numeric comparison.
Handle numbers in exponential notation. */
bool month; /* Flag for comparison by month name. */
@@ -296,6 +300,7 @@
-i, --ignore-nonprinting consider only printable characters\n\
-M, --month-sort compare (unknown) < `JAN' < ... < `DEC'\n\
-n, --numeric-sort compare according to string numerical value\n\
+ -R, --random sort by random hash of keys\n\
-r, --reverse reverse the result of comparisons\n\
\n\
"), stdout);
@@ -318,6 +323,7 @@
"), DEFAULT_TMPDIR);
fputs (_("\
-z, --zero-terminated end lines with 0 byte, not newline\n\
+ --seed use specified seed for random number generator\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -346,7 +352,11 @@
exit (status);
}
-#define COMMON_SHORT_OPTIONS "-bcdfgik:mMno:rsS:t:T:uz"
+#define COMMON_SHORT_OPTIONS "-bcdfgik:mMnRo:rsS:t:T:uz"
+enum
+{
+ SEED_OPTION = CHAR_MAX + 1
+};
static struct option const long_options[] =
{
@@ -360,6 +370,7 @@
{"merge", no_argument, NULL, 'm'},
{"month-sort", no_argument, NULL, 'M'},
{"numeric-sort", no_argument, NULL, 'n'},
+ {"random", no_argument, NULL, 'R'},
{"output", required_argument, NULL, 'o'},
{"reverse", no_argument, NULL, 'r'},
{"stable", no_argument, NULL, 's'},
@@ -368,6 +379,7 @@
{"temporary-directory", required_argument, NULL, 'T'},
{"unique", no_argument, NULL, 'u'},
{"zero-terminated", no_argument, NULL, 'z'},
+ {"seed", required_argument, NULL, SEED_OPTION},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{0, 0, 0, 0},
@@ -1332,6 +1344,37 @@
return result;
}
+/* ---- stuff for --random ---- */
+
+/* operations specifying the hash function for random sorting
+ ("digest"), see checksum.h */
+struct digest_ops digops;
+/* a "context" which holds the state of the hash function after
+ processing the random seed */
+void *salt_ctx;
+
+/* allocate and initialize salt_ctx */
+static void
+init_salt ()
+{
+ get_digest_ops(ALG_MD5, &digops);
+ salt_ctx = xmalloc(digops.ctx_size);
+ digops.init_ctx(salt_ctx);
+}
+
+/* compute the hash value of a key. the result has a fixed size
+ specified by digops.bits */
+static void
+get_hash (char* text, int len, void *resbuf)
+{
+ void *ctx = alloca(digops.ctx_size);
+ memcpy(ctx, salt_ctx, digops.ctx_size);
+ digops.process_bytes(text, len, ctx);
+ digops.finish_ctx(ctx,resbuf);
+}
+
+/* ---------------------------------------------------------------- */
+
/* Compare two lines A and B trying every key in sequence until there
are no more keys or a difference is found. */
@@ -1374,6 +1417,15 @@
(texta, textb));
*lima = savea, *limb = saveb;
}
+ else if (key->random_hash)
+ {
+ int dig_bytes = digops.bits/8;
+ char diga[dig_bytes];
+ char digb[dig_bytes];
+ get_hash(texta, lena, diga);
+ get_hash(textb, lenb, digb);
+ diff = memcmp(diga, digb, sizeof(diga));
+ }
else if (key->month)
diff = getmonth (texta, lena) - getmonth (textb, lenb);
/* Sorting like this may become slow, so in a simple locale the user
@@ -2083,7 +2135,7 @@
{
error (SORT_FAILURE, 0, _("%s: invalid field specification `%s'"),
_(msgid), spec);
- abort ();
+ abort (); // XXX is this ever reached? need comment if it is
}
/* Parse the leading integer in STRING and store the resulting value
@@ -2189,6 +2241,9 @@
case 'n':
key->numeric = true;
break;
+ case 'R':
+ key->random_hash = true;
+ break;
case 'r':
key->reverse = true;
break;
@@ -2217,6 +2272,8 @@
int c = 0;
bool checkonly = false;
bool mergeonly = false;
+ char *randseed = 0;
+ bool need_rand = false;
int nfiles = 0;
bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL);
bool obsolete_usage = (posix2_version () < 200112);
@@ -2300,7 +2357,7 @@
gkey.sword = gkey.eword = SIZE_MAX;
gkey.ignore = NULL;
gkey.translate = NULL;
- gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false;
+ gkey.numeric = gkey.general_numeric = gkey.random_hash = gkey.month = gkey.reverse = false;
gkey.skipsblanks = gkey.skipeblanks = false;
files = xnmalloc (argc, sizeof *files);
@@ -2376,6 +2433,7 @@
case 'M':
case 'n':
case 'r':
+ case 'R':
{
char str[2];
str[0] = c;
@@ -2503,6 +2561,10 @@
eolchar = 0;
break;
+ case SEED_OPTION:
+ randseed = strdupa(optarg);
+ break;
+
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -2512,26 +2574,41 @@
}
}
+ need_rand |= gkey.random_hash;
/* Inheritance of global options to individual keys. */
- for (key = keylist; key; key = key->next)
- if (! (key->ignore || key->translate
- || (key->skipsblanks | key->reverse
- | key->skipeblanks | key->month | key->numeric
- | key->general_numeric)))
- {
- key->ignore = gkey.ignore;
- key->translate = gkey.translate;
- key->skipsblanks = gkey.skipsblanks;
- key->skipeblanks = gkey.skipeblanks;
- key->month = gkey.month;
- key->numeric = gkey.numeric;
- key->general_numeric = gkey.general_numeric;
- key->reverse = gkey.reverse;
- }
+ for (key = keylist; key; key = key->next)
+ {
+ need_rand |= key->random_hash;
+ if (! (key->ignore || key->translate
+ || (key->skipsblanks | key->reverse
+ | key->skipeblanks | key->month | key->numeric
+ | key->general_numeric
+ | key->random_hash)))
+ {
+ key->ignore = gkey.ignore;
+ key->translate = gkey.translate;
+ key->skipsblanks = gkey.skipsblanks;
+ key->skipeblanks = gkey.skipeblanks;
+ key->month = gkey.month;
+ key->numeric = gkey.numeric;
+ key->general_numeric = gkey.general_numeric;
+ key->random_hash = gkey.random_hash;
+ key->reverse = gkey.reverse;
+ }
+ }
+
+ if (need_rand) {
+ init_salt();
+ if(randseed)
+ string_seed(&digops, salt_ctx, randseed);
+ else
+ default_seed(&digops, salt_ctx);
+ }
if (!keylist && (gkey.ignore || gkey.translate
|| (gkey.skipsblanks | gkey.skipeblanks | gkey.month
- | gkey.numeric | gkey.general_numeric)))
+ | gkey.numeric | gkey.general_numeric
+ | gkey.random_hash)))
insertkey (&gkey);
reverse = gkey.reverse;