[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] md5sum, sha*sum: add --base32 option
From: |
Noah Levitt |
Subject: |
[PATCH] md5sum, sha*sum: add --base32 option |
Date: |
Fri, 13 Nov 2009 20:52:19 -0800 |
User-agent: |
Thunderbird 2.0.0.23 (X11/20090817) |
Hello,
Here is a patch to add a --base32 option to md5sum and sha*sum.
The Internet Archive's web crawler Heritrix records digests of crawled content
in base32. With *sum --base32 it will be much easier to work with those digests.
Noah
>From 39d0fbc760686d6bcc9110404d33115d3f5e38f5 Mon Sep 17 00:00:00 2001
From: Noah Levitt <address@hidden(none)>
Date: Fri, 13 Nov 2009 20:12:10 -0800
Subject: [PATCH] md5sum, sha*sum: add --base32 option
* src/md5sum.c: Add --base32 option. With --base32, when printing
digests, print in base32 instead of hex; when verifying, expect them in
base32.
* doc/coreutils.texi: Document --base32 option.
---
doc/coreutils.texi | 7 ++
src/md5sum.c | 191 ++++++++++++++++++++++++++++++++++++++++++----------
2 files changed, 162 insertions(+), 36 deletions(-)
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 3721bee..f9fe5f2 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3454,6 +3454,13 @@ the MD5 checksum is unaffected. This option is the
default on systems
like MS-DOS that distinguish between binary and text files, except
for reading standard input when standard input is a terminal.
address@hidden --base32
+When printing checksums, print in base32 encoding, rather than
+hexadecimal. When verifying checksums, expect them in base32. See
address@hidden://tools.ietf.org/html/rfc4648, RFC 4648} for the definition
+of base32. Note that @command{md5sum} uses the standard base32 alphabet
+(not the ``extended hex'' version).
+
@item -c
@itemx --check
Read file names and checksum information (not data) from each
diff --git a/src/md5sum.c b/src/md5sum.c
index b7db03e..0adee76 100644
--- a/src/md5sum.c
+++ b/src/md5sum.c
@@ -87,6 +87,7 @@
#endif
#define DIGEST_HEX_BYTES (DIGEST_BITS / 4)
+#define DIGEST_BASE32_BYTES ((DIGEST_BITS + 4) / 5)
#define DIGEST_BIN_BYTES (DIGEST_BITS / 8)
#define AUTHORS \
@@ -96,10 +97,14 @@
/* The minimum length of a valid digest line. This length does
not include any newline character at the end of a line. */
-#define MIN_DIGEST_LINE_LENGTH \
+#define MIN_HEX_DIGEST_LINE_LENGTH \
(DIGEST_HEX_BYTES /* length of hexadecimal message digest */ \
+ 2 /* blank and binary indicator */ \
+ 1 /* minimum filename length */ )
+#define MIN_BASE32_DIGEST_LINE_LENGTH \
+ (DIGEST_BASE32_BYTES /* length of base32 message digest */ \
+ + 2 /* blank and binary indicator */ \
+ + 1 /* minimum filename length */ )
/* True if any of the files read were the standard input. */
static bool have_read_stdin;
@@ -107,8 +112,12 @@ static bool have_read_stdin;
/* The minimum length of a valid checksum line for the selected algorithm. */
static size_t min_digest_line_length;
-/* Set to the length of a digest hex string for the selected algorithm. */
-static size_t digest_hex_bytes;
+/* Set to the length of a digest string for the selected algorithm and base
+ * (hex or base32). */
+static size_t digest_string_bytes;
+
+/* Base32 digits as specified by RFC 4648. */
+static char base32_chars[] = "abcdefghijklmnopqrstuvwxyz234567";
/* With --check, don't generate any output.
The exit code indicates success or failure. */
@@ -137,6 +146,7 @@ static struct option const long_options[] =
{ "status", no_argument, NULL, STATUS_OPTION },
{ "text", no_argument, NULL, 't' },
{ "warn", no_argument, NULL, 'w' },
+ { "base32", no_argument, NULL, '3' },
{ GETOPT_HELP_OPTION_DECL },
{ GETOPT_VERSION_OPTION_DECL },
{ NULL, 0, NULL, 0 }
@@ -178,6 +188,9 @@ With no FILE, or when FILE is -, read standard input.\n\
fputs (_("\
-t, --text read in text mode (default)\n\
"), stdout);
+ printf (_("\
+ --base32 print or check %s sums in base32 (RFC 4648)\n"),
+ DIGEST_TYPE_STRING);
fputs (_("\
\n\
The following three options are useful only when verifying checksums:\n\
@@ -244,12 +257,12 @@ bsd_split_3 (char *s, size_t s_len, unsigned char
**hex_digest, char **file_name
}
/* Split the string S (of length S_LEN) into three parts:
- a hexadecimal digest, binary flag, and the file name.
+ a digest string, binary flag, and the file name.
S is modified. Return true if successful. */
static bool
split_3 (char *s, size_t s_len,
- unsigned char **hex_digest, int *binary, char **file_name)
+ unsigned char **digest, int *binary, char **file_name)
{
bool escaped_filename = false;
size_t algo_name_len;
@@ -269,7 +282,7 @@ split_3 (char *s, size_t s_len,
*binary = 0;
return bsd_split_3 (s + i + algo_name_len + 1,
s_len - (i + algo_name_len + 1),
- hex_digest, file_name);
+ digest, file_name);
}
}
@@ -285,12 +298,12 @@ split_3 (char *s, size_t s_len,
++i;
escaped_filename = true;
}
- *hex_digest = (unsigned char *) &s[i];
+ *digest = (unsigned char *) &s[i];
/* The first field has to be the n-character hexadecimal
representation of the message digest. If it is not followed
immediately by a white space it's an error. */
- i += digest_hex_bytes;
+ i += digest_string_bytes;
if (!ISWHITE (s[i]))
return false;
@@ -357,7 +370,7 @@ static bool
hex_digits (unsigned char const *s)
{
unsigned int i;
- for (i = 0; i < digest_hex_bytes; i++)
+ for (i = 0; i < DIGEST_HEX_BYTES; i++)
{
if (!isxdigit (*s))
return false;
@@ -366,6 +379,21 @@ hex_digits (unsigned char const *s)
return *s == '\0';
}
+/* Return true if S is a NUL-terminated string of DIGEST_HEX_BYTES hex digits.
+ Otherwise, return false. */
+static bool
+base32_digits (unsigned char const *s)
+{
+ unsigned int i;
+ for (i = 0; i < DIGEST_BASE32_BYTES; i++)
+ {
+ if (! strchr (base32_chars, tolower (*s)))
+ return false;
+ ++s;
+ }
+ return *s == '\0';
+}
+
/* An interface to the function, DIGEST_STREAM.
Operate on FILENAME (it may be "-").
@@ -425,7 +453,89 @@ digest_file (const char *filename, int *binary, unsigned
char *bin_result)
}
static bool
-digest_check (const char *checkfile_name)
+hex_digest_matches (unsigned char const *bin_buffer,
+ char const *purported_digest)
+{
+ static const char bin2hex[] = { '0', '1', '2', '3',
+ '4', '5', '6', '7',
+ '8', '9', 'a', 'b',
+ 'c', 'd', 'e', 'f' };
+ size_t digest_bin_bytes = DIGEST_HEX_BYTES / 2;
+ size_t cnt;
+
+ /* Compare generated binary number with text representation
+ in check file. Ignore case of hex digits. */
+ for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
+ {
+ if (tolower (purported_digest[2 * cnt]) != bin2hex[bin_buffer[cnt] >> 4]
+ || (tolower (purported_digest[2 * cnt + 1]) !=
(bin2hex[bin_buffer[cnt] & 0xf])))
+ return false;
+ }
+
+ return true;
+}
+
+static bool
+print_or_check_base32 (unsigned char const *bin_buffer,
+ char const *purported_digest,
+ FILE *out)
+{
+ /* based on public domain code from bitzi by way of heritrix */
+ int i = 0, index = 0, digit = 0;
+ size_t cnt = 0;
+ unsigned char currByte, nextByte;
+
+ for (i = 0; i < DIGEST_BASE32_BYTES; i++)
+ {
+ currByte = bin_buffer[cnt];
+
+ /* Is the current digit going to span a byte boundary? */
+ if (index > 3)
+ {
+ if (cnt + 1 < DIGEST_BIN_BYTES)
+ nextByte = bin_buffer[cnt+1];
+ else
+ nextByte = 0;
+
+ digit = currByte & (0xFF >> index);
+ index = (index + 5) % 8;
+ digit <<= index;
+ digit |= nextByte >> (8 - index);
+ cnt++;
+ }
+ else
+ {
+ digit = (currByte >> (8 - (index + 5))) & 0x1F;
+ index = (index + 5) % 8;
+ if (index == 0)
+ cnt++;
+ }
+
+ if (purported_digest != NULL
+ && base32_chars[digit] != tolower (purported_digest[i]))
+ return false;
+ else if (out)
+ fputc (base32_chars[digit], out);
+ }
+
+ return true;
+}
+
+static void
+print_base32_digest (const unsigned char *bin_buffer)
+{
+ print_or_check_base32 (bin_buffer, NULL, stdout);
+}
+
+static bool
+base32_digest_matches (const unsigned char *bin_buffer,
+ const char *purported_digest)
+{
+ return print_or_check_base32 (bin_buffer, purported_digest, NULL);
+}
+
+static bool
+digest_check (const char *checkfile_name, bool base32)
{
FILE *checkfile_stream;
uintmax_t n_properly_formatted_lines = 0;
@@ -462,7 +572,7 @@ digest_check (const char *checkfile_name)
{
char *filename IF_LINT (= NULL);
int binary;
- unsigned char *hex_digest IF_LINT (= NULL);
+ unsigned char *purported_digest IF_LINT (= NULL);
ssize_t line_length;
++line_number;
@@ -482,9 +592,11 @@ digest_check (const char *checkfile_name)
if (line[line_length - 1] == '\n')
line[--line_length] = '\0';
- if (! (split_3 (line, line_length, &hex_digest, &binary, &filename)
+ if (! (split_3 (line, line_length, &purported_digest, &binary, &filename)
&& ! (is_stdin && STREQ (filename, "-"))
- && hex_digits (hex_digest)))
+ && (base32 && base32_digits (purported_digest)
+ || ! base32 && hex_digits (purported_digest))))
+
{
if (warn)
{
@@ -497,10 +609,6 @@ digest_check (const char *checkfile_name)
}
else
{
- static const char bin2hex[] = { '0', '1', '2', '3',
- '4', '5', '6', '7',
- '8', '9', 'a', 'b',
- 'c', 'd', 'e', 'f' };
bool ok;
++n_properly_formatted_lines;
@@ -517,24 +625,20 @@ digest_check (const char *checkfile_name)
}
else
{
- size_t digest_bin_bytes = digest_hex_bytes / 2;
- size_t cnt;
- /* Compare generated binary number with text representation
- in check file. Ignore case of hex digits. */
- for (cnt = 0; cnt < digest_bin_bytes; ++cnt)
- {
- if (tolower (hex_digest[2 * cnt])
- != bin2hex[bin_buffer[cnt] >> 4]
- || (tolower (hex_digest[2 * cnt + 1])
- != (bin2hex[bin_buffer[cnt] & 0xf])))
- break;
- }
- if (cnt != digest_bin_bytes)
+ bool digest_matches;
+ if (base32)
+ digest_matches = base32_digest_matches (bin_buffer,
+ purported_digest);
+ else
+ digest_matches = hex_digest_matches (bin_buffer,
+ purported_digest);
+
+ if (!digest_matches)
++n_mismatched_checksums;
if (!status_only)
{
- if (cnt != digest_bin_bytes)
+ if (!digest_matches)
printf ("%s: %s\n", filename, _("FAILED"));
else if (!quiet)
printf ("%s: %s\n", filename, _("OK"));
@@ -607,6 +711,7 @@ main (int argc, char **argv)
int opt;
bool ok = true;
int binary = -1;
+ bool base32 = false;
/* Setting values of global variables. */
initialize_main (&argc, &argv);
@@ -630,6 +735,9 @@ main (int argc, char **argv)
case 'c':
do_check = true;
break;
+ case '3':
+ base32 = true;
+ break;
case STATUS_OPTION:
status_only = true;
warn = false;
@@ -654,8 +762,16 @@ main (int argc, char **argv)
usage (EXIT_FAILURE);
}
- min_digest_line_length = MIN_DIGEST_LINE_LENGTH;
- digest_hex_bytes = DIGEST_HEX_BYTES;
+ if (base32)
+ {
+ digest_string_bytes = DIGEST_BASE32_BYTES;
+ min_digest_line_length = MIN_BASE32_DIGEST_LINE_LENGTH;
+ }
+ else
+ {
+ digest_string_bytes = DIGEST_HEX_BYTES;
+ min_digest_line_length = MIN_HEX_DIGEST_LINE_LENGTH;
+ }
if (0 <= binary && do_check)
{
@@ -696,7 +812,7 @@ main (int argc, char **argv)
char *file = argv[optind];
if (do_check)
- ok &= digest_check (file);
+ ok &= digest_check (file, base32);
else
{
int file_is_binary = binary;
@@ -712,8 +828,11 @@ main (int argc, char **argv)
if (strchr (file, '\n') || strchr (file, '\\'))
putchar ('\\');
- for (i = 0; i < (digest_hex_bytes / 2); ++i)
- printf ("%02x", bin_buffer[i]);
+ if (base32)
+ print_base32_digest (bin_buffer);
+ else
+ for (i = 0; i < (DIGEST_HEX_BYTES / 2); ++i)
+ printf ("%02x", bin_buffer[i]);
putchar (' ');
if (file_is_binary)
--
1.6.3.3
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [PATCH] md5sum, sha*sum: add --base32 option,
Noah Levitt <=