[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
coreutils patch to multithread md5sum for parallel hashing (ala the HP-U
From: |
Brett L. Trotter |
Subject: |
coreutils patch to multithread md5sum for parallel hashing (ala the HP-UX days) |
Date: |
Wed, 24 Mar 2010 19:13:37 -0500 |
User-agent: |
Thunderbird 2.0.0.24 (X11/20100311) |
Hello, this is my first post to the list, so I'll say in advance here
I'm pleased to meet you all.
I've been out of C/C++ land for a while due to the economy, but found
myself hashing a bunch of 46GB blu ray images and discs for verification
lately and wanted a simple way to cut down the time involved without
starting separate terminals, running screen, etc. HP-UX's md5sum
had/has(?) a -n option for parallelizing the hashing. I did a quick
implementation today, and it's probably nothing like the sort of code
you folks write and likely can be optimized quite a bit, but I was
sincerely hoping that the feature could make it into coreutils, either
based on my code or someone else's.
It's a patch against the version in coreutils-5.97-23.el5_4.2.src.rpm on
RHEL 5.4. It's been tested lightly, shows a performance -decrease- for
small numbers of small files, but in increase for larger files or larger
numbers of files. I haven't yet gotten around to making the ptach apply
to the makefile.am, so I was manually adding -lpthread to the link lines
for the *sum programs in the generated makefile.
Again, this is not anywhere near a production ready patch- and I'm aware
that output ordering will be potentially out of order when N > 1 is
used, but I'd love any thoughts, improvements, or reasons why md5sum
shouldn't be able to parallel process like the old days.
-Brett
P.S. I've also attached the fully patched md5sum.c.
*** coreutils-5.97/src/md5sum.c 2010-03-24 11:29:26.000000000 -0500
--- coreutils-5.97/src/md5sum.c.threaded 2010-03-24 14:09:25.000000000
-0500
***************
*** 40,45 ****
--- 40,46 ----
#include "error.h"
#include "quote.h"
#include "stdio--.h"
+ #include <pthread.h>
/* The official name of this program (e.g., no `g' prefix). */
#if HASH_ALGO_MD5
***************
*** 139,150 ****
--- 140,165 ----
{ "check", no_argument, NULL, 'c' },
{ "status", no_argument, NULL, STATUS_OPTION },
{ "text", no_argument, NULL, 't' },
+ { "threads", no_argument, NULL, 'n' },
{ "warn", no_argument, NULL, 'w' },
{ GETOPT_HELP_OPTION_DECL },
{ GETOPT_VERSION_OPTION_DECL },
{ NULL, 0, NULL, 0 }
};
+ struct thread_node
+ {
+ bool do_check;
+ int binary;
+ pthread_t thread;
+ char *filename;
+ struct thread_node *next;
+ };
+
+ static pthread_mutex_t m_thread_pool;
+ static struct thread_node *thread_pool = NULL;
+ static bool ok = true;
+
void
usage (int status)
{
***************
*** 181,186 ****
--- 196,204 ----
fputs (_("\
-t, --text read in text mode (default)\n\
"), stdout);
+ fputs (_("\
+ -nX, --threads=X process X (1 - 16) files in parallel\n\
+ "), stdout);
fputs (_("\
\n\
The following two options are useful only when verifying checksums:\n\
***************
*** 592,607 ****
&& n_open_or_read_failures == 0);
}
! int
! main (int argc, char **argv)
{
unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES+DIGEST_ALIGN];
/* Make sure bin_buffer is properly aligned. */
unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
bool do_check = false;
int opt;
- bool ok = true;
int binary = -1;
/* Setting values of global variables. */
initialize_main (&argc, &argv);
--- 610,718 ----
&& n_open_or_read_failures == 0);
}
! void
! *thread_start(void *node)
{
unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES+DIGEST_ALIGN];
/* Make sure bin_buffer is properly aligned. */
unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
+ struct thread_node *tn = (struct thread_node*) node;
+ struct thread_node *tmp = NULL;
+
+ if ((node == NULL) || (thread_pool == NULL)) return NULL;
+
+ if (tn->filename != NULL) {
+ if (tn->do_check)
+ ok &= digest_check (tn->filename);
+ else
+ {
+ int file_is_binary = tn->binary;
+
+ if (! digest_file (tn->filename, &file_is_binary, bin_buffer))
+ ok = false;
+ else
+ {
+ size_t i;
+
+ pthread_mutex_lock(&m_thread_pool);
+ /* Output a leading backslash if the file name contains
+ a newline or backslash. */
+ if (strchr (tn->filename, '\n') || strchr (tn->filename, '\\'))
+ putchar ('\\');
+
+ for (i = 0; i < (digest_hex_bytes / 2); ++i)
+ printf ("%02x", bin_buffer[i]);
+
+ putchar (' ');
+ if (file_is_binary)
+ putchar ('*');
+ else
+ putchar (' ');
+
+ /* Translate each NEWLINE byte to the string, "\\n",
+ and each backslash to "\\\\". */
+ for (i = 0; i < strlen (tn->filename); ++i)
+ {
+ switch (tn->filename[i])
+ {
+ case '\n':
+ fputs ("\\n", stdout);
+ break;
+
+ case '\\':
+ fputs ("\\\\", stdout);
+ break;
+
+ default:
+ putchar (tn->filename[i]);
+ break;
+ }
+ }
+ putchar ('\n');
+ }
+ pthread_mutex_unlock(&m_thread_pool);
+ }
+ }
+
+ /* thread cleanup */
+ pthread_mutex_lock(&m_thread_pool);
+ if (thread_pool != NULL) {
+ if (thread_pool == tn) {
+ /* we're the first thread, set root to next (if any) */
+ thread_pool = thread_pool->next;
+
+ } else {
+ tmp = thread_pool;
+ while (tmp->next != NULL)
+ {
+ if (tmp->next == tn) {
+ /* the next node is us, set next node to our next */
+ tmp->next = tn->next;
+ break;
+ }
+ tmp = tmp->next;
+ }
+ }
+
+ free(tn->filename);
+ tn->filename = NULL;
+ tn->next = NULL;
+ free(tn);
+ tn = NULL;
+ }
+ pthread_mutex_unlock(&m_thread_pool);
+ }
+
+ int
+ main (int argc, char **argv)
+ {
bool do_check = false;
int opt;
int binary = -1;
+ int threads_max = -1;
+ int threads_running = 0;
+ struct thread_node *tmpnode = NULL;
+ pthread_attr_t attr;
/* Setting values of global variables. */
initialize_main (&argc, &argv);
***************
*** 612,618 ****
atexit (close_stdout);
! while ((opt = getopt_long (argc, argv, "bctw", long_options, NULL)) != -1)
switch (opt)
{
case 'b':
--- 723,729 ----
atexit (close_stdout);
! while ((opt = getopt_long (argc, argv, "bctwn:", long_options, NULL)) != -1)
switch (opt)
{
case 'b':
***************
*** 632,637 ****
--- 743,751 ----
status_only = false;
warn = true;
break;
+ case 'n':
+ threads_max = atoi(optarg);
+ break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
***************
*** 662,725 ****
usage (EXIT_FAILURE);
}
if (!O_BINARY && binary < 0)
binary = 0;
if (optind == argc)
argv[argc++] = "-";
for (; optind < argc; ++optind)
{
! char *file = argv[optind];
!
! if (do_check)
! ok &= digest_check (file);
! else
! {
! int file_is_binary = binary;
! if (! digest_file (file, &file_is_binary, bin_buffer))
! ok = false;
! else
! {
! size_t i;
! /* Output a leading backslash if the file name contains
! a newline or backslash. */
! if (strchr (file, '\n') || strchr (file, '\\'))
! putchar ('\\');
! for (i = 0; i < (digest_hex_bytes / 2); ++i)
! printf ("%02x", bin_buffer[i]);
! putchar (' ');
! if (file_is_binary)
! putchar ('*');
! else
! putchar (' ');
! /* Translate each NEWLINE byte to the string, "\\n",
! and each backslash to "\\\\". */
! for (i = 0; i < strlen (file); ++i)
! {
! switch (file[i])
! {
! case '\n':
! fputs ("\\n", stdout);
! break;
! case '\\':
! fputs ("\\\\", stdout);
! break;
! default:
! putchar (file[i]);
! break;
! }
! }
! putchar ('\n');
! }
! }
}
if (have_read_stdin && fclose (stdin) == EOF)
--- 776,868 ----
usage (EXIT_FAILURE);
}
+ if ((threads_max != -1) && do_check)
+ {
+ error (0, 0, _("the --threads option is meaningless when "
+ "verifying checksums"));
+ usage (EXIT_FAILURE);
+ }
+
+ if (threads_max == -1) threads_max = 1;
+
+ if (((threads_max < 1) || (threads_max > 16)) && !do_check)
+ {
+ error (0, 0, _("the --threads argument must be between 1 and 16"));
+ usage (EXIT_FAILURE);
+ }
+
if (!O_BINARY && binary < 0)
binary = 0;
if (optind == argc)
argv[argc++] = "-";
+ /* initialize thread apparatus */
+ thread_pool = NULL;
+ pthread_mutex_init(&m_thread_pool, NULL);
+
for (; optind < argc; ++optind)
{
! while (1)
! {
! pthread_mutex_lock(&m_thread_pool);
! threads_running = 0;
! tmpnode = thread_pool;
! while (tmpnode != NULL) {
! threads_running++;
! if (tmpnode->next != NULL) tmpnode = tmpnode->next;
! else break;
! }
! /* see if we need to start a new thread */
! if (threads_running < threads_max)
! {
! if (thread_pool == NULL)
! {
! thread_pool = (struct thread_node*)xmalloc(sizeof(struct
thread_node));
! tmpnode = thread_pool;
! } else {
! tmpnode->next = (struct thread_node*)xmalloc(sizeof(struct
thread_node));
! tmpnode = tmpnode->next;
! }
!
! if (tmpnode != NULL)
! {
! tmpnode->do_check = do_check;
! tmpnode->binary = binary;
! tmpnode->filename = strdup(argv[optind]);
! tmpnode->next = NULL;
! tmpnode->thread = NULL;
!
! pthread_attr_init(&attr);
! pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
! pthread_create( &(tmpnode->thread), &attr, thread_start, (void
*) tmpnode);
! }
!
! pthread_mutex_unlock(&m_thread_pool);
! break;
! }
! pthread_mutex_unlock(&m_thread_pool);
! }
! }
! /* wait for all threads */
! while (1)
! {
! pthread_mutex_lock(&m_thread_pool);
! threads_running = 0;
! tmpnode = thread_pool;
! while (tmpnode != NULL) {
! threads_running++;
! tmpnode = tmpnode->next;
! }
! pthread_mutex_unlock(&m_thread_pool);
! if (threads_running == 0) break;
}
if (have_read_stdin && fclose (stdin) == EOF)
/* Compute MD5, SHA1, SHA224, SHA256, SHA384 or SHA512 checksum of files or
strings
Copyright (C) 1995-2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Written by Ulrich Drepper <address@hidden>. */
#include <config.h>
#include <getopt.h>
#include <sys/types.h>
#include "system.h"
#if HASH_ALGO_MD5
# include "md5.h"
#endif
#if HASH_ALGO_SHA1
# include "sha1.h"
#endif
#if HASH_ALGO_SHA256 || HASH_ALGO_SHA224
# include "sha256.h"
#endif
#if HASH_ALGO_SHA512 || HASH_ALGO_SHA384
# include "sha512.h"
#endif
#include "getline.h"
#include "error.h"
#include "quote.h"
#include "stdio--.h"
#include <pthread.h>
/* The official name of this program (e.g., no `g' prefix). */
#if HASH_ALGO_MD5
# define PROGRAM_NAME "md5sum"
# define DIGEST_TYPE_STRING "MD5"
# define DIGEST_STREAM md5_stream
# define DIGEST_BUFFER md5_buffer
# define DIGEST_BITS 128
# define DIGEST_REFERENCE "RFC 1321"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA1
# define PROGRAM_NAME "sha1sum"
# define DIGEST_TYPE_STRING "SHA1"
# define DIGEST_STREAM sha1_stream
# define DIGEST_BUFFER sha1_buffer
# define DIGEST_BITS 160
# define DIGEST_REFERENCE "FIPS-180-1"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA256
# define PROGRAM_NAME "sha256sum"
# define DIGEST_TYPE_STRING "SHA256"
# define DIGEST_STREAM sha256_stream
# define DIGEST_BUFFER sha256_buffer
# define DIGEST_BITS 256
# define DIGEST_REFERENCE "FIPS-180-2"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA224
# define PROGRAM_NAME "sha224sum"
# define DIGEST_TYPE_STRING "SHA224"
# define DIGEST_STREAM sha224_stream
# define DIGEST_BUFFER sha224_buffer
# define DIGEST_BITS 224
# define DIGEST_REFERENCE "RFC 3874"
# define DIGEST_ALIGN 4
#elif HASH_ALGO_SHA512
# define PROGRAM_NAME "sha512sum"
# define DIGEST_TYPE_STRING "SHA512"
# define DIGEST_STREAM sha512_stream
# define DIGEST_BUFFER sha512_buffer
# define DIGEST_BITS 512
# define DIGEST_REFERENCE "FIPS-180-2"
# define DIGEST_ALIGN 8
#elif HASH_ALGO_SHA384
# define PROGRAM_NAME "sha384sum"
# define DIGEST_TYPE_STRING "SHA384"
# define DIGEST_STREAM sha384_stream
# define DIGEST_BUFFER sha384_buffer
# define DIGEST_BITS 384
# define DIGEST_REFERENCE "FIPS-180-2"
# define DIGEST_ALIGN 8
#else
# error "Can't decide which hash algorithm to compile."
#endif
#define DIGEST_HEX_BYTES (DIGEST_BITS / 4)
#define DIGEST_BIN_BYTES (DIGEST_BITS / 8)
#define AUTHORS "Ulrich Drepper", "Scott Miller", "David Madore"
/* The minimum length of a valid digest line. This length does
not include any newline character at the end of a line. */
#define MIN_DIGEST_LINE_LENGTH \
(DIGEST_HEX_BYTES /* length of hexadecimal message digest */ \
+ 2 /* blank and binary indicator */ \
+ 1 /* minimum filename length */ )
/* True if any of the files read were the standard input. */
static bool have_read_stdin;
/* The minimum length of a valid checksum line for the selected algorithm. */
static size_t min_digest_line_length;
/* Set to the length of a digest hex string for the selected algorithm. */
static size_t digest_hex_bytes;
/* With --check, don't generate any output.
The exit code indicates success or failure. */
static bool status_only = false;
/* With --check, print a message to standard error warning about each
improperly formatted checksum line. */
static bool warn = false;
/* The name this program was run with. */
char *program_name;
/* For long options that have no equivalent short option, use a
non-character as a pseudo short option, starting with CHAR_MAX + 1. */
enum
{
STATUS_OPTION = CHAR_MAX + 1
};
static const struct option long_options[] =
{
{ "binary", no_argument, NULL, 'b' },
{ "check", no_argument, NULL, 'c' },
{ "status", no_argument, NULL, STATUS_OPTION },
{ "text", no_argument, NULL, 't' },
{ "threads", no_argument, NULL, 'n' },
{ "warn", no_argument, NULL, 'w' },
{ GETOPT_HELP_OPTION_DECL },
{ GETOPT_VERSION_OPTION_DECL },
{ NULL, 0, NULL, 0 }
};
struct thread_node
{
bool do_check;
int binary;
pthread_t thread;
char *filename;
struct thread_node *next;
};
static pthread_mutex_t m_thread_pool;
static struct thread_node *thread_pool = NULL;
static bool ok = true;
void
usage (int status)
{
if (status != EXIT_SUCCESS)
fprintf (stderr, _("Try `%s --help' for more information.\n"),
program_name);
else
{
printf (_("\
Usage: %s [OPTION] [FILE]...\n\
Print or check %s (%d-bit) checksums.\n\
With no FILE, or when FILE is -, read standard input.\n\
\n\
"),
program_name,
DIGEST_TYPE_STRING,
DIGEST_BITS);
if (O_BINARY)
fputs (_("\
-b, --binary read in binary mode (default unless reading tty
stdin)\n\
"), stdout);
else
fputs (_("\
-b, --binary read in binary mode\n\
"), stdout);
printf (_("\
-c, --check read %s sums from the FILEs and check them\n"),
DIGEST_TYPE_STRING);
if (O_BINARY)
fputs (_("\
-t, --text read in text mode (default if reading tty stdin)\n\
"), stdout);
else
fputs (_("\
-t, --text read in text mode (default)\n\
"), stdout);
fputs (_("\
-nX, --threads=X process X (1 - 16) files in parallel\n\
"), stdout);
fputs (_("\
\n\
The following two options are useful only when verifying checksums:\n\
--status don't output anything, status code shows success\n\
-w, --warn warn about improperly formatted checksum lines\n\
\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
printf (_("\
\n\
The sums are computed as described in %s. When checking, the input\n\
should be a former output of this program. The default mode is to print\n\
a line with checksum, a character indicating type (`*' for binary, ` ' for\n\
text), and name for each FILE.\n"),
DIGEST_REFERENCE);
printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
}
exit (status);
}
#define ISWHITE(c) ((c) == ' ' || (c) == '\t')
/* Split the checksum string S (of length S_LEN) from a BSD 'md5' or
'sha1' command into two parts: a hexadecimal digest, and the file
name. S is modified. Return true if successful. */
static bool
bsd_split_3 (char *s, size_t s_len, unsigned char **hex_digest, char
**file_name)
{
size_t i;
*file_name = s;
/* Find end of filename. The BSD 'md5' and 'sha1' commands do not escape
filenames, so search backwards for the last ')'. */
i = s_len - 1;
while (i && s[i] != ')')
i--;
if (s[i] != ')')
return false;
s[i++] = '\0';
while (ISWHITE (s[i]))
i++;
if (s[i] != '=')
return false;
i++;
while (ISWHITE (s[i]))
i++;
*hex_digest = (unsigned char *) &s[i];
return true;
}
/* Split the string S (of length S_LEN) into three parts:
a hexadecimal digest, binary flag, and the file name.
S is modified. Return true if successful. */
static bool
split_3 (char *s, size_t s_len,
unsigned char **hex_digest, int *binary, char **file_name)
{
size_t i;
bool escaped_filename = false;
size_t algo_name_len;
i = 0;
while (ISWHITE (s[i]))
++i;
/* Check for BSD-style checksum line. */
algo_name_len = strlen (DIGEST_TYPE_STRING);
if (strncmp (s + i, DIGEST_TYPE_STRING, algo_name_len) == 0)
{
if (strncmp (s + i + algo_name_len, " (", 2) == 0)
{
*binary = 0;
return bsd_split_3 (s + i + algo_name_len + 2,
s_len - (i + algo_name_len + 2),
hex_digest, file_name);
}
}
/* Ignore this line if it is too short.
Each line must have at least `min_digest_line_length - 1' (or one more, if
the first is a backslash) more characters to contain correct message digest
information. */
if (s_len - i < min_digest_line_length + (s[i] == '\\'))
return false;
if (s[i] == '\\')
{
++i;
escaped_filename = true;
}
*hex_digest = (unsigned char *) &s[i];
/* The first field has to be the n-character hexadecimal
representation of the message digest. If it is not followed
immediately by a white space it's an error. */
i += digest_hex_bytes;
if (!ISWHITE (s[i]))
return false;
s[i++] = '\0';
if (s[i] != ' ' && s[i] != '*')
return false;
*binary = (s[i++] == '*');
/* All characters between the type indicator and end of line are
significant -- that includes leading and trailing white space. */
*file_name = &s[i];
if (escaped_filename)
{
/* Translate each `\n' string in the file name to a NEWLINE,
and each `\\' string to a backslash. */
char *dst = &s[i];
while (i < s_len)
{
switch (s[i])
{
case '\\':
if (i == s_len - 1)
{
/* A valid line does not end with a backslash. */
return false;
}
++i;
switch (s[i++])
{
case 'n':
*dst++ = '\n';
break;
case '\\':
*dst++ = '\\';
break;
default:
/* Only `\' or `n' may follow a backslash. */
return false;
}
break;
case '\0':
/* The file name may not contain a NUL. */
return false;
break;
default:
*dst++ = s[i++];
break;
}
}
*dst = '\0';
}
return true;
}
static bool
hex_digits (unsigned char const *s)
{
while (*s)
{
if (!ISXDIGIT (*s))
return false;
++s;
}
return true;
}
/* An interface to the function, DIGEST_STREAM.
Operate on FILENAME (it may be "-").
*BINARY indicates whether the file is binary. BINARY < 0 means it
depends on whether binary mode makes any difference and the file is
a terminal; in that case, clear *BINARY if the file was treated as
text because it was a terminal.
Put the checksum in *BIN_RESULT, which must be properly aligned.
Return true if successful. */
static bool
digest_file (const char *filename, int *binary, unsigned char *bin_result)
{
FILE *fp;
int err;
bool is_stdin = STREQ (filename, "-");
if (is_stdin)
{
have_read_stdin = true;
fp = stdin;
if (O_BINARY && *binary)
{
if (*binary < 0)
*binary = ! isatty (STDIN_FILENO);
if (*binary)
freopen (NULL, "rb", stdin);
}
}
else
{
fp = fopen (filename, (O_BINARY && *binary ? "rb" : "r"));
if (fp == NULL)
{
error (0, errno, "%s", filename);
return false;
}
}
err = DIGEST_STREAM (fp, bin_result);
if (err)
{
error (0, errno, "%s", filename);
if (fp != stdin)
fclose (fp);
return false;
}
if (!is_stdin && fclose (fp) != 0)
{
error (0, errno, "%s", filename);
return false;
}
return true;
}
static bool
digest_check (const char *checkfile_name)
{
FILE *checkfile_stream;
uintmax_t n_properly_formatted_lines = 0;
uintmax_t n_mismatched_checksums = 0;
uintmax_t n_open_or_read_failures = 0;
unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES + DIGEST_ALIGN];
/* Make sure bin_buffer is properly aligned. */
unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
uintmax_t line_number;
char *line;
size_t line_chars_allocated;
bool is_stdin = STREQ (checkfile_name, "-");
if (is_stdin)
{
have_read_stdin = true;
checkfile_name = _("standard input");
checkfile_stream = stdin;
}
else
{
checkfile_stream = fopen (checkfile_name, "r");
if (checkfile_stream == NULL)
{
error (0, errno, "%s", checkfile_name);
return false;
}
}
line_number = 0;
line = NULL;
line_chars_allocated = 0;
do
{
char *filename;
int binary;
unsigned char *hex_digest IF_LINT (= NULL);
ssize_t line_length;
++line_number;
if (line_number == 0)
error (EXIT_FAILURE, 0, _("%s: too many checksum lines"),
checkfile_name);
line_length = getline (&line, &line_chars_allocated, checkfile_stream);
if (line_length <= 0)
break;
/* Ignore comment lines, which begin with a '#' character. */
if (line[0] == '#')
continue;
/* Remove any trailing newline. */
if (line[line_length - 1] == '\n')
line[--line_length] = '\0';
if (! (split_3 (line, line_length, &hex_digest, &binary, &filename)
&& ! (is_stdin && STREQ (filename, "-"))
&& hex_digits (hex_digest)))
{
if (warn)
{
error (0, 0,
_("%s: %" PRIuMAX
": improperly formatted %s checksum line"),
checkfile_name, line_number,
DIGEST_TYPE_STRING);
}
}
else
{
static const char bin2hex[] = { '0', '1', '2', '3',
'4', '5', '6', '7',
'8', '9', 'a', 'b',
'c', 'd', 'e', 'f' };
bool ok;
++n_properly_formatted_lines;
ok = digest_file (filename, &binary, bin_buffer);
if (!ok)
{
++n_open_or_read_failures;
if (!status_only)
{
printf (_("%s: FAILED open or read\n"), filename);
fflush (stdout);
}
}
else
{
size_t digest_bin_bytes = digest_hex_bytes / 2;
size_t cnt;
/* Compare generated binary number with text representation
in check file. Ignore case of hex digits. */
for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
{
if (TOLOWER (hex_digest[2 * cnt])
!= bin2hex[bin_buffer[cnt] >> 4]
|| (TOLOWER (hex_digest[2 * cnt + 1])
!= (bin2hex[bin_buffer[cnt] & 0xf])))
break;
}
if (cnt != digest_bin_bytes)
++n_mismatched_checksums;
if (!status_only)
{
printf ("%s: %s\n", filename,
(cnt != digest_bin_bytes ? _("FAILED") : _("OK")));
fflush (stdout);
}
}
}
}
while (!feof (checkfile_stream) && !ferror (checkfile_stream));
free (line);
if (ferror (checkfile_stream))
{
error (0, 0, _("%s: read error"), checkfile_name);
return false;
}
if (!is_stdin && fclose (checkfile_stream) != 0)
{
error (0, errno, "%s", checkfile_name);
return false;
}
if (n_properly_formatted_lines == 0)
{
/* Warn if no tests are found. */
error (0, 0, _("%s: no properly formatted %s checksum lines found"),
checkfile_name, DIGEST_TYPE_STRING);
}
else
{
if (!status_only)
{
if (n_open_or_read_failures != 0)
error (0, 0,
ngettext ("WARNING: %" PRIuMAX " of %" PRIuMAX
" listed file could not be read",
"WARNING: %" PRIuMAX " of %" PRIuMAX
" listed files could not be read",
n_properly_formatted_lines),
n_open_or_read_failures, n_properly_formatted_lines);
if (n_mismatched_checksums != 0)
{
uintmax_t n_computed_checksums =
(n_properly_formatted_lines - n_open_or_read_failures);
error (0, 0,
ngettext ("WARNING: %" PRIuMAX " of %" PRIuMAX
" computed checksum did NOT match",
"WARNING: %" PRIuMAX " of %" PRIuMAX
" computed checksums did NOT match",
n_computed_checksums),
n_mismatched_checksums, n_computed_checksums);
}
}
}
return (n_properly_formatted_lines != 0
&& n_mismatched_checksums == 0
&& n_open_or_read_failures == 0);
}
void
*thread_start(void *node)
{
unsigned char bin_buffer_unaligned[DIGEST_BIN_BYTES+DIGEST_ALIGN];
/* Make sure bin_buffer is properly aligned. */
unsigned char *bin_buffer = ptr_align (bin_buffer_unaligned, DIGEST_ALIGN);
struct thread_node *tn = (struct thread_node*) node;
struct thread_node *tmp = NULL;
if ((node == NULL) || (thread_pool == NULL)) return NULL;
if (tn->filename != NULL) {
if (tn->do_check)
ok &= digest_check (tn->filename);
else
{
int file_is_binary = tn->binary;
if (! digest_file (tn->filename, &file_is_binary, bin_buffer))
ok = false;
else
{
size_t i;
pthread_mutex_lock(&m_thread_pool);
/* Output a leading backslash if the file name contains
a newline or backslash. */
if (strchr (tn->filename, '\n') || strchr (tn->filename, '\\'))
putchar ('\\');
for (i = 0; i < (digest_hex_bytes / 2); ++i)
printf ("%02x", bin_buffer[i]);
putchar (' ');
if (file_is_binary)
putchar ('*');
else
putchar (' ');
/* Translate each NEWLINE byte to the string, "\\n",
and each backslash to "\\\\". */
for (i = 0; i < strlen (tn->filename); ++i)
{
switch (tn->filename[i])
{
case '\n':
fputs ("\\n", stdout);
break;
case '\\':
fputs ("\\\\", stdout);
break;
default:
putchar (tn->filename[i]);
break;
}
}
putchar ('\n');
}
pthread_mutex_unlock(&m_thread_pool);
}
}
/* thread cleanup */
pthread_mutex_lock(&m_thread_pool);
if (thread_pool != NULL) {
if (thread_pool == tn) {
/* we're the first thread, set root to next (if any) */
thread_pool = thread_pool->next;
} else {
tmp = thread_pool;
while (tmp->next != NULL)
{
if (tmp->next == tn) {
/* the next node is us, set next node to our next */
tmp->next = tn->next;
break;
}
tmp = tmp->next;
}
}
free(tn->filename);
tn->filename = NULL;
tn->next = NULL;
free(tn);
tn = NULL;
}
pthread_mutex_unlock(&m_thread_pool);
}
int
main (int argc, char **argv)
{
bool do_check = false;
int opt;
int binary = -1;
int threads_max = -1;
int threads_running = 0;
struct thread_node *tmpnode = NULL;
pthread_attr_t attr;
/* Setting values of global variables. */
initialize_main (&argc, &argv);
program_name = argv[0];
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit (close_stdout);
while ((opt = getopt_long (argc, argv, "bctwn:", long_options, NULL)) != -1)
switch (opt)
{
case 'b':
binary = 1;
break;
case 'c':
do_check = true;
break;
case STATUS_OPTION:
status_only = true;
warn = false;
break;
case 't':
binary = 0;
break;
case 'w':
status_only = false;
warn = true;
break;
case 'n':
threads_max = atoi(optarg);
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
usage (EXIT_FAILURE);
}
min_digest_line_length = MIN_DIGEST_LINE_LENGTH;
digest_hex_bytes = DIGEST_HEX_BYTES;
if (0 <= binary && do_check)
{
error (0, 0, _("the --binary and --text options are meaningless when "
"verifying checksums"));
usage (EXIT_FAILURE);
}
if (status_only & !do_check)
{
error (0, 0,
_("the --status option is meaningful only when verifying checksums"));
usage (EXIT_FAILURE);
}
if (warn & !do_check)
{
error (0, 0,
_("the --warn option is meaningful only when verifying checksums"));
usage (EXIT_FAILURE);
}
if ((threads_max != -1) && do_check)
{
error (0, 0, _("the --threads option is meaningless when "
"verifying checksums"));
usage (EXIT_FAILURE);
}
if (threads_max == -1) threads_max = 1;
if (((threads_max < 1) || (threads_max > 16)) && !do_check)
{
error (0, 0, _("the --threads argument must be between 1 and 16"));
usage (EXIT_FAILURE);
}
if (!O_BINARY && binary < 0)
binary = 0;
if (optind == argc)
argv[argc++] = "-";
/* initialize thread apparatus */
thread_pool = NULL;
pthread_mutex_init(&m_thread_pool, NULL);
for (; optind < argc; ++optind)
{
while (1)
{
pthread_mutex_lock(&m_thread_pool);
threads_running = 0;
tmpnode = thread_pool;
while (tmpnode != NULL) {
threads_running++;
if (tmpnode->next != NULL) tmpnode = tmpnode->next;
else break;
}
/* see if we need to start a new thread */
if (threads_running < threads_max)
{
if (thread_pool == NULL)
{
thread_pool = (struct thread_node*)xmalloc(sizeof(struct
thread_node));
tmpnode = thread_pool;
} else {
tmpnode->next = (struct thread_node*)xmalloc(sizeof(struct
thread_node));
tmpnode = tmpnode->next;
}
if (tmpnode != NULL)
{
tmpnode->do_check = do_check;
tmpnode->binary = binary;
tmpnode->filename = strdup(argv[optind]);
tmpnode->next = NULL;
tmpnode->thread = NULL;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
pthread_create( &(tmpnode->thread), &attr, thread_start, (void *)
tmpnode);
}
pthread_mutex_unlock(&m_thread_pool);
break;
}
pthread_mutex_unlock(&m_thread_pool);
}
}
/* wait for all threads */
while (1)
{
pthread_mutex_lock(&m_thread_pool);
threads_running = 0;
tmpnode = thread_pool;
while (tmpnode != NULL) {
threads_running++;
tmpnode = tmpnode->next;
}
pthread_mutex_unlock(&m_thread_pool);
if (threads_running == 0) break;
}
if (have_read_stdin && fclose (stdin) == EOF)
error (EXIT_FAILURE, errno, _("standard input"));
exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}
- coreutils patch to multithread md5sum for parallel hashing (ala the HP-UX days),
Brett L. Trotter <=