diff -Nur findutils/find/cmp.c findutils-rreale/find/cmp.c --- findutils/find/cmp.c 1970-01-01 01:00:00.000000000 +0100 +++ findutils-rreale/find/cmp.c 2010-04-05 22:29:19.000000000 +0200 @@ -0,0 +1,224 @@ +/* Buffer primitives for comparison operations. Adapted from the + following files: + + - src/cmp.c in GNU diffutils 2.9 + + Copyright (C) 1990-1996, 1998, 2001-2002, 2004, 2006-2007, 2009-2010 Free + Software Foundation, Inc. + + - lib/cmpbuf.c in GNU diffutils 2.9 + + Copyright (C) 1993, 1995, 1998, 2001-2002, 2006, 2009-2010 Free Software + Foundation, Inc. */ + +#define LARGE_BLOCK_SIZE 4096 + +#include + +#include "defs.h" +#include +#include "xalloc.h" +#include "error.h" + +#include +#ifndef SA_RESTART +# ifdef SA_INTERRUPT /* e.g. SunOS 4.1.x */ +# define SA_RESTART SA_INTERRUPT +# else +# define SA_RESTART 0 +# endif +#endif + +#ifndef PTRDIFF_MAX +# define PTRDIFF_MAX TYPE_MAXIMUM (ptrdiff_t) +#endif +#ifndef SIZE_MAX +# define SIZE_MAX TYPE_MAXIMUM (size_t) +#endif +#ifndef SSIZE_MAX +# define SSIZE_MAX TYPE_MAXIMUM (ssize_t) +#endif + +#ifndef word +# define word uintmax_t +#endif + +#undef MIN +#define MIN(a, b) ((a) <= (b) ? (a) : (b)) + +/* Read NBYTES bytes from descriptor FD into BUF. + NBYTES must not be SIZE_MAX. + Return the number of characters successfully read. + On error, return SIZE_MAX, setting errno. + The number returned is always NBYTES unless end-of-file or error. */ + +size_t +block_read (int fd, char *buf, size_t nbytes) +{ + char *bp = buf; + char const *buflim = buf + nbytes; + size_t readlim = MIN (SSIZE_MAX, SIZE_MAX); + + do + { + size_t bytes_remaining = buflim - bp; + size_t bytes_to_read = MIN (bytes_remaining, readlim); + ssize_t nread = read (fd, bp, bytes_to_read); + + if (nread <= 0) + { + if (nread == 0) + break; + + /* Accommodate Tru64 5.1, which can't read more than INT_MAX + bytes at a time. They call that a 64-bit OS? */ + if (errno == EINVAL && INT_MAX < bytes_to_read) + { + readlim = INT_MAX; + continue; + } + + /* This is needed for programs that have signal handlers on + older hosts without SA_RESTART. It also accommodates + ancient AIX hosts that set errno to EINTR after uncaught + SIGCONT. See + (1993-04-22). */ + if (! SA_RESTART && errno == EINTR) + continue; + + return SIZE_MAX; + } + bp += nread; + } + while (bp < buflim); + + return bp - buf; +} + +size_t +safe_block_read (int fd, char *pathname, char *buf, size_t nbytes) +{ + size_t read; + + read = block_read (fd, buf, nbytes); + + if (read == SIZE_MAX) + error (1, errno, "%s", safely_quote_err_filename (0, pathname)); + + return read; +} + +/* Compare two blocks of memory P0 and P1 until they differ. + If the blocks are not guaranteed to be different, put sentinels at the ends + of the blocks before calling this function. + + Return the offset of the first byte that differs. */ + +size_t +block_compare (word const *p0, word const *p1) +{ + word const *l0, *l1; + char const *c0, *c1; + + /* Find the rough position of the first difference by reading words, + not bytes. */ + + for (l0 = p0, l1 = p1; *l0 == *l1; l0++, l1++) + continue; + + /* Find the exact differing position (endianness independent). */ + + for (c0 = (char const *) l0, c1 = (char const *) l1; + *c0 == *c1; + c0++, c1++) + continue; + + return c0 - (char const *) p0; +} + +/* Compare the target file (opened on file descriptor fd0) with the + reference file (opened on file descriptor fd1). + Return true if files don't differ, otherwise false. */ + +boolean +cmp (int fd0, int fd1, char *ref_pathname, char *pathname, struct predicate *pred_ptr) +{ + uintmax_t remaining = UINTMAX_MAX; + size_t read0, read1; /* Number of bytes read from each file. */ + size_t first_diff; /* Offset (0...) in buffers of 1st diff. */ + char *buffer0 = NULL; + char *buffer1 = NULL; + boolean is_first_block = true; + boolean differing = false; + size_t buf_size = LARGE_BLOCK_SIZE; + + lseek (fd0, 0, SEEK_SET); + lseek (fd1, 0, SEEK_SET); + + do + { + size_t bytes_to_read = buf_size; + + if (remaining != UINTMAX_MAX) + { + if (remaining < bytes_to_read) + bytes_to_read = remaining; + remaining -= bytes_to_read; + } + + if (is_first_block) + { + struct samecontent_args *refer = &pred_ptr->args.samecontentargs; + + if (refer->first_block == NULL) + { + refer->first_block = (char *) xmalloc (buf_size); + refer->read = safe_block_read (fd0, ref_pathname, refer->first_block, bytes_to_read); + } + + buffer0 = refer->first_block; + read0 = refer->read; + lseek (fd0, read0, SEEK_SET); + + buffer1 = (char *) xmalloc (buf_size); + read1 = safe_block_read (fd1, pathname, buffer1, bytes_to_read); + } + else + { + if (buffer0 == NULL) + buffer0 = (char *) xmalloc (buf_size); + + read0 = safe_block_read (fd0, ref_pathname, buffer0, bytes_to_read); + read1 = safe_block_read (fd1, pathname, buffer1, bytes_to_read); + } + + assert (read0 == read1); + + /* Insert sentinels for the block compare. */ + + buffer0[read0] = ~buffer1[read0]; + buffer1[read1] = ~buffer0[read1]; + + first_diff = block_compare ((word *) buffer0, (word *) buffer1); + + if (is_first_block) + { + buffer0 = NULL; + is_first_block = false; + } + + if (first_diff < read0) + { + differing = true; + break; + } + } + while (!differing && read0 == buf_size); + + if (buffer0 != NULL) + free (buffer0); + if (buffer1 != NULL) + free (buffer1); + + return !differing; +} diff -Nur findutils/find/defs.h findutils-rreale/find/defs.h --- findutils/find/defs.h 2010-04-05 13:18:59.000000000 +0200 +++ findutils-rreale/find/defs.h 2010-04-05 19:45:50.000000000 +0200 @@ -168,6 +168,14 @@ int fd; }; +struct samecontent_args +{ + struct stat st; + char *first_block; + size_t read; + char *ref_pathname; +}; + struct size_val { enum comparison_type kind; @@ -319,6 +327,7 @@ struct time_val reftime; /* newer newerXY anewer cnewer mtime atime ctime mmin amin cmin */ struct perm_val perm; /* perm */ struct samefile_file_id samefileid; /* samefile */ + struct samecontent_args samecontentargs; /* samecontent */ mode_t type; /* type */ struct format_val printf_vec; /* printf fprintf fprint ls fls print0 fprint0 print */ security_context_t scontext; /* security context */ @@ -467,6 +476,7 @@ PREDICATEFUNCTION pred_writable; PREDICATEFUNCTION pred_xtype; PREDICATEFUNCTION pred_context; +PREDICATEFUNCTION pred_samecontent; @@ -528,6 +538,9 @@ boolean looks_like_expression PARAMS((const char *arg, boolean leading)); +/* cmp.c */ +boolean cmp (int fd0, int fd1, char *ref_pathname, char *pathname, struct predicate *pred_ptr); + enum DebugOption { DebugNone = 0, @@ -547,7 +560,7 @@ /* If true, -depth was EXPLICITLY set (as opposed to having been turned * on by -delete, for example). */ - boolean explicit_depth; + boolean explicit_depth; /* If >=0, don't descend more than this many levels of subdirectories. */ int maxdepth; diff -Nur findutils/find/Makefile.am findutils-rreale/find/Makefile.am --- findutils/find/Makefile.am 2010-04-05 13:18:59.000000000 +0200 +++ findutils-rreale/find/Makefile.am 2010-04-05 14:48:52.000000000 +0200 @@ -5,7 +5,7 @@ # regexprops_SOURCES = regexprops.c noinst_LIBRARIES = libfindtools.a -libfindtools_a_SOURCES = finddata.c fstype.c parser.c pred.c tree.c util.c sharefile.c +libfindtools_a_SOURCES = finddata.c fstype.c parser.c pred.c tree.c util.c sharefile.c cmp.c # We always build two versions of find, one with fts, one without. diff -Nur findutils/find/parser.c findutils-rreale/find/parser.c --- findutils/find/parser.c 2010-04-05 13:18:59.000000000 +0200 +++ findutils-rreale/find/parser.c 2010-04-05 18:33:37.000000000 +0200 @@ -159,6 +159,7 @@ static boolean parse_xtype PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_quit PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); static boolean parse_context PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); +static boolean parse_samecontent PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); boolean parse_print PARAMS((const struct parser_table*, char *argv[], int *arg_ptr)); @@ -326,6 +327,7 @@ {ARG_TEST, "writable", parse_accesscheck, pred_writable}, /* GNU, 4.3.0+ */ PARSE_OPTION ("xdev", xdev), /* POSIX */ PARSE_TEST ("xtype", xtype), /* GNU */ + PARSE_TEST ("samecontent", samecontent), #ifdef UNIMPLEMENTED_UNIX /* It's pretty ugly for find to know about archive formats. Plus what it could do with cpio archives is very limited. @@ -2820,6 +2822,41 @@ { return insert_type (argv, arg_ptr, entry, pred_xtype); } + +static boolean +parse_samecontent (const struct parser_table* entry, char **argv, int *arg_ptr) +{ + struct predicate *our_pred; + const char *pathname; + struct stat st; + + set_stat_placeholders(&st); + + if (collect_arg(argv, arg_ptr, &pathname)) + { + if (0 != (options.xstat)(pathname, &st)) + { + fatal_file_error(pathname); + } + } + else + { + return false; + } + + our_pred = insert_primary (entry, NULL); + + memcpy (&our_pred->args.samecontentargs.st, &st, sizeof (struct stat)); + our_pred->args.samecontentargs.first_block = NULL; + our_pred->args.samecontentargs.ref_pathname = xmalloc (strlen (pathname) + 1); + strcpy (our_pred->args.samecontentargs.ref_pathname, pathname); + + our_pred->need_type = false; + our_pred->need_stat = true; + our_pred->est_success_rate = 0.0001f; + + return true; +} static boolean insert_type (char **argv, int *arg_ptr, diff -Nur findutils/find/pred.c findutils-rreale/find/pred.c --- findutils/find/pred.c 2010-04-05 13:18:59.000000000 +0200 +++ findutils-rreale/find/pred.c 2010-04-05 19:54:25.000000000 +0200 @@ -234,6 +234,7 @@ {pred_writable, "writable "}, {pred_xtype, "xtype "}, {pred_context, "context"}, + {pred_samecontent, "samecontent"}, {0, "none "} }; #endif @@ -1905,6 +1906,30 @@ return (pred_type (pathname, &sbuf, pred_ptr)); } +boolean +pred_samecontent (const char *pathname, struct stat *stat_buf, struct predicate *pred_ptr) +{ + struct stat *st = &pred_ptr->args.samecontentargs.st; + char *ref_pathname = pred_ptr->args.samecontentargs.ref_pathname; + int fd0, fd1; + boolean exit_status = false; + + if (!S_ISREG (stat_buf->st_mode) || !S_ISREG (st->st_mode)) + return false; + + if (stat_buf->st_size != st->st_size) + return false; + + fd0 = open (ref_pathname, O_RDONLY | O_BINARY, 0); + fd1 = open (pathname, O_RDONLY | O_BINARY, 0); + + exit_status = cmp (fd0, fd1, ref_pathname, pathname, pred_ptr); + + close (fd0); + close (fd1); + + return exit_status; +} boolean pred_context (const char *pathname, struct stat *stat_buf,