507755520 /home/ztion/words
real 0m1.518s
user 0m1.157s
sys 0m0.361s
I studied the generated assembly and with -O3 gcc generates some fancy SSE code, getting some nice speedups. memchr is also SSE optimized as far as I know, so it's interesting that this is so much faster, twice as fast actually.
In case you don't like turning -O3 on for some reason (the default in coreutils is -O2 i think), the best version I could put together for -O2 was this:
Improved version 2, compiled with -O2:
507755520 /home/ztion/words
real 0m2.206s
user 0m1.827s
sys 0m0.379s
Improved version:
--- /home/ztion/coreutils/core/coreutils-8.23/src/wc.c 2014-07-11 13:00:07.000000000 +0200
+++ wc.c 2015-03-15 09:01:38.141536166 +0100
@@ -259,11 +259,14 @@
}
else if (!count_chars && !count_complicated)
{
+ uintmax_t count_lines;
+
+ count_lines = 0;
/* Use a separate loop when counting only lines or lines and bytes --
but not chars or words. */
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- char *p = buf;
+ unsigned char *p = buf, *end;
if (bytes_read == SAFE_READ_ERROR)
{
@@ -272,13 +275,18 @@
break;
}
- while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
+ end = buf + bytes_read;
+ /* this is actually faster than memchr */
+ while (p != end)
{
+ count_lines += *p == '\n';
++p;
- ++lines;
}
+
bytes += bytes_read;
}
+ lines = count_lines;
+
}
#if MB_LEN_MAX > 1
# define SUPPORT_OLD_MBRTOWC 1
Improved version 2:
--- /home/ztion/coreutils/core/coreutils-8.23/src/wc.c 2014-07-11 13:00:07.000000000 +0200
+++ wc.c 2015-03-15 09:27:55.815459623 +0100
@@ -259,11 +259,15 @@
}
else if (!count_chars && !count_complicated)
{
+ uintmax_t count_lines;
+
+ count_lines = 0;
/* Use a separate loop when counting only lines or lines and bytes --
but not chars or words. */
while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0)
{
- char *p = buf;
+ unsigned char *p = buf, *end;
+ uint32_t temp_chars;
if (bytes_read == SAFE_READ_ERROR)
{
@@ -272,13 +276,37 @@
break;
}
- while ((p = memchr (p, '\n', (buf + bytes_read) - p)))
+ end = buf + bytes_read;
+ while (p < end - 8)
+ {
+ temp_chars = (*((int32_t *)p));
+ count_lines += (temp_chars & 0xff) == '\n';
+ count_lines += ((temp_chars >> 8) & 0xff) == '\n';
+ count_lines += ((temp_chars >> 16) & 0xff) == '\n';
+ count_lines += ((temp_chars >> 24) & 0xff) == '\n';
+
+ p += 4;
+
+ temp_chars = (*((int32_t *)p));
+ count_lines += (temp_chars & 0xff) == '\n';
+ count_lines += ((temp_chars >> 8) & 0xff) == '\n';
+ count_lines += ((temp_chars >> 16) & 0xff) == '\n';
+ count_lines += ((temp_chars >> 24) & 0xff) == '\n';
+
+ p += 4;
+
+ }
+ /* do last bytes */
+ while (p != end)
{
- ++p;
- ++lines;
+ count_lines += *p == '\n';
+ p++;
}
+
bytes += bytes_read;
}
+ lines = count_lines;
+
}
#if MB_LEN_MAX > 1
# define SUPPORT_OLD_MBRTOWC 1