>From 00aedf3f3c1b60c4e734d042b91f687a19d9e1fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Mon, 23 Mar 2015 11:54:19 +0000 Subject: [PATCH] wc: use a more adaptive wc -l implementation * src/wc.c (wc): Allow any block to select the count implementation, rather than just using the first 10 lines. This also simplifies the code from 3 loops to 2. --- src/wc.c | 55 +++++++++++++++++++++++++------------------------------ 1 file changed, 25 insertions(+), 30 deletions(-) diff --git a/src/wc.c b/src/wc.c index ceb48ed..7d030ae 100644 --- a/src/wc.c +++ b/src/wc.c @@ -265,7 +265,6 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) /* Use a separate loop when counting only lines or lines and bytes -- but not chars or words. */ bool long_lines = false; - bool check_len = true; while ((bytes_read = safe_read (fd, buf, BUFFER_SIZE)) > 0) { char *p = buf; @@ -277,41 +276,37 @@ wc (int fd, char const *file_x, struct fstatus *fstatus, off_t current_pos) break; } + bytes += bytes_read; + char *end = p + bytes_read; + uintmax_t plines = lines; - /* Avoid function call overhead for shorter lines. */ - if (check_len) - while (p != end) - { + if (! long_lines) + { + /* Avoid function call overhead for shorter lines. */ + while (p != end) lines += *p++ == '\n'; - /* If there are more than 150 chars in the first 10 lines, - then use memchr, where system specific optimizations - may outweigh function call overhead. - FIXME: This line length was determined in 2015, on both - x86_64 and ppc64, but it's worth re-evaluating in future with - newer compilers, CPUs, or memchr() implementations etc. */ - if (lines <= 10) - { - if (p - buf > 150) - { - long_lines = true; - break; - } - } - } - else if (! long_lines) - while (p != end) - lines += *p++ == '\n'; - - /* memchr is more efficient with longer lines. */ - while ((p = memchr (p, '\n', (buf + bytes_read) - p))) + } + else { - ++p; - ++lines; + /* memchr is more efficient with longer lines. */ + while ((p = memchr (p, '\n', (buf + bytes_read) - p))) + { + ++p; + ++lines; + } } - bytes += bytes_read; - check_len = false; + /* If the average line length in the block is > 15, then use + memchr for the next block, where system specific optimizations + may outweigh function call overhead. + FIXME: This line length was determined in 2015, on both + x86_64 and ppc64, but it's worth re-evaluating in future with + newer compilers, CPUs, or memchr() implementations etc. */ + if (lines == plines || (bytes_read / (lines - plines) > 15)) + long_lines = true; + else + long_lines = false; } } #if MB_LEN_MAX > 1 -- 2.1.0