/* linecut -- output slices of lines from file(s) Copyright (C) 2007 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include #include #include #include #include "system.h" #include "error.h" #include "quote.h" #include "safe-read.h" /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "linecut" #define AUTHORS "Steven Schubiger" /* Maximum of allowed range sets. */ #define MAX_RANGE_SETS (4 * 2) /* Macro for validation of range components. */ #define VALID_RANGE(range) (ISDIGIT (range) || range == '-' || range == '+') /* If true, range parameter provided. */ static bool range_mode; /* If true, print numbered lines. */ static bool number_mode; /* If true, print filename headers. */ static bool print_headers; /* When to print the filename banners. */ enum header_mode { multiple_files, always, never }; /* Are we reading from standard input? */ static bool is_stdin; /* Have we ever read standard input? */ static bool have_read_stdin; /* The name this program was run with. */ static char *program_name; /* The iterator for the ranges set. */ static int range_max; /* The sets of ranges. */ static long ranges[MAX_RANGE_SETS]; /* The file offsets to seek for. */ static off_t offsets[MAX_RANGE_SETS]; /* How many lines the stream consists of. */ static int lines; /* Container for file-descriptor and handle. */ static struct { int fd; FILE *fh; } input; /* Buffer for line numbers. An 11 digit counter may overflow within an hour on a P2/466, an 18 digit counter needs about 1000y */ #define LINE_COUNTER_BUF_LEN 20 static char line_buf[LINE_COUNTER_BUF_LEN] = { ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0', '\t', '\0' }; /* Position in `line_buf' where printing starts. This will not change unless the number of lines is larger than 999999. */ static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8; /* Position of the first digit in `line_buf'. */ static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3; /* Position of the last digit in `line_buf'. */ static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3; /* Preserves the `cat' function's local `newlines' between invocations. */ static int newlines2 = 0; static struct option const long_options[] = { {"number", no_argument, NULL, 'n'}, {"range", required_argument, NULL, 'r'}, {"quiet", no_argument, NULL, 'q'}, {"silent", no_argument, NULL, 'q'}, {"verbose", no_argument, NULL, 'v'}, {GETOPT_HELP_OPTION_DECL}, {GETOPT_VERSION_OPTION_DECL}, {NULL, 0, NULL, 0} }; void usage (int status) { if (status != EXIT_SUCCESS) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else { printf (_("\ Usage: %s [OPTION]... [FILE]...\n\ "), program_name); fputs (_("\ Print slices of lines of each FILE to standard output.\n\ With more than one FILE, precede each with a header giving the file name.\n\ With no FILE, or when FILE is -, read standard input.\n\ \n\ "), stdout); fputs (_("\ Mandatory arguments to long options are mandatory for short options too.\n\ "), stdout); fputs (_("\ -n, --number number all output lines\n\ -r, --range=N slices of lines to be output\n\ "), stdout); fputs (_("\ -q, --quiet, --silent never print headers giving file names\n\ -v, --verbose always print headers giving file names\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); fputs (_("\ \n\ N must conform to following format:\n\ N_START:N_END, ...\n\ N_START or N_END may be an absolute line position or relative-to-EOF one.\n\ "), stdout); emit_bug_reporting_address (); } exit (status); } static void write_header (const char *filename) { static bool first_file = true; printf ("%s==> %s <==\n", (first_file ? "" : "\n"), filename); first_file = false; } static void next_line_num (void) { char *endp = line_num_end; do { if ((*endp)++ < '9') return; *endp-- = '0'; } while (endp >= line_num_start); if (line_num_start > line_buf) *--line_num_start = '1'; else *line_buf = '>'; if (line_num_start < line_num_print) line_num_print--; } static bool elide_lines_seekable (const char *filename, int fd, uintmax_t line_from, uintmax_t line_to, off_t offset_from, off_t offset_to) { char buffer[BUFSIZ]; char *buf, *next, *curr; uintmax_t line_current; if ((lseek (fd, offset_from, SEEK_SET)) < 0) { error (0, errno, _("cannot lseek %s"), quote (filename)); return false; } line_current = line_from; while (1) { size_t bytes_read = safe_read (fd, buffer, BUFSIZ); size_t bytes_to_write = 0; if (bytes_read == SAFE_READ_ERROR) { error (0, errno, _("error reading %s"), quote (filename)); return false; } if (bytes_read == 0) break; buf = buffer; for (; line_current <= line_to; line_current++) { if (bytes_to_write < bytes_read) { if (number_mode) { if (fwrite (line_num_print, 1, strlen (line_num_print), stdout) < strlen (line_num_print)) { error (0, errno, _("write error")); return false; } next_line_num (); } /* full line with trailing newline detected. */ if (strstr (buf, "\n") != NULL) { curr = (char *) rawmemchr (buf, '\n'); next = curr; next++; *curr = '\0'; if (fwrite (buf, 1, curr - buf, stdout) < curr - buf) { error (0, errno, _("write error")); return false; } if (fwrite ("\n", 1, 1, stdout) < 1) { error (0, errno, _("write error")); return false; } bytes_to_write += curr - buf; buf = next; } /* part of line with no trailing newline. */ else { if (fwrite (buf, 1, buffer + BUFSIZ - buf, stdout) < buffer + BUFSIZ - buf) { error (0, errno, _("write error")); return false; } bytes_to_write += buffer + BUFSIZ - buf; break; } } else break; } } return true; } static void relative_range_to_absolute (long *range_pos) { if (*range_pos < 0) *range_pos += lines + 1; } static bool range_lines (const char *filename, int fd) { bool ok; int i; long line_from, line_to; off_t offset_from, offset_to; if (print_headers) write_header (filename); for (i = 0; i < range_max; i += 2) { line_from = ranges[i]; line_to = ranges[i+1]; relative_range_to_absolute (&line_from); relative_range_to_absolute (&line_to); offset_from = offsets[i]; offset_to = offsets[i+1]; ok = elide_lines_seekable (filename, fd, line_from, line_to, offset_from, offset_to); } return ok; } static bool determine_seek_offsets (const char *filename, int fd) { char buffer[BUFSIZ]; int i; long line_iter, range_abs; size_t bytes_offset; off_t seek_offset; if ((lseek (fd, 0, SEEK_SET)) < 0) { error (0, errno, _("cannot lseek %s"), quote (filename)); return false; } for (i = 0, line_iter = 0, bytes_offset = 0, seek_offset = 0; 1;) { size_t bytes_read = safe_read (fd, buffer, BUFSIZ); size_t bytes_to_iter = 0; if (bytes_read == SAFE_READ_ERROR) { error (0, errno, _("error reading %s"), quote (filename)); return false; } if (bytes_read == 0) break; while (bytes_to_iter < bytes_read) { if (buffer[bytes_to_iter] == '\n') { line_iter++; break; } bytes_to_iter++; } bytes_offset += bytes_to_iter + 1; range_abs = ranges[i]; relative_range_to_absolute (&range_abs); if (line_iter == range_abs) offsets[i++] = seek_offset; if ((seek_offset = lseek (fd, bytes_offset, SEEK_SET)) < 0) { error (0, errno, _("cannot lseek %s"), quote (filename)); return false; } } return true; } static bool count_lines_in_stream (const char *filename, int fd) { char buffer[BUFSIZ]; uintmax_t total_lines; for (total_lines = 0; 1;) { size_t bytes_read = safe_read (fd, buffer, BUFSIZ); size_t bytes_to_iter = 0; if (bytes_read == SAFE_READ_ERROR) { error (0, errno, _("error reading %s"), quote (filename)); return false; } if (bytes_read == 0) break; while (bytes_to_iter < bytes_read) { if (buffer[bytes_to_iter] == '\n') total_lines++; bytes_to_iter++; } } lines = total_lines; return true; } static bool validate_ranges (const char *filename, int fd) { bool ok; int i; long line_start, line_end; long seen_start, seen_end; ok = count_lines_in_stream (filename, fd); for (i = 0, seen_start = 0, seen_end = 0; i < range_max; i += 2) { line_start = ranges[i]; line_end = ranges[i+1]; if (line_start == 0) error (EXIT_FAILURE, 0, _("starting line must not be 0")); if (line_end == 0) error (EXIT_FAILURE, 0, _("ending line must not be 0")); relative_range_to_absolute (&line_start); relative_range_to_absolute (&line_end); if (line_start <= seen_start) error (EXIT_FAILURE, 0, _("starting line overlaps with previous ending one")); if (line_start > line_end) error (EXIT_FAILURE, 0, _("starting line must preceed the ending line")); if (line_start > lines) error (EXIT_FAILURE, 0, _("starting line exceeds total lines of %s"), quote (filename)); if (line_end > lines) error (EXIT_FAILURE, 0, _("ending line exceeds total lines of %s"), quote (filename)); seen_start = line_start; seen_start = line_end; } return ok; } static bool buffer_stdin_to_tmpfile (const char *filename, int fd) { char buffer[BUFSIZ]; FILE *tempfile; if ((tempfile = tmpfile ()) == NULL) { error (0, errno, _("cannot create temporary file")); return false; } while (1) { size_t bytes_read = safe_read (fd, buffer, BUFSIZ); size_t bytes_to_write = 0; if (bytes_read == SAFE_READ_ERROR) { error (0, errno, _("error reading %s"), quote (filename)); return false; } if (bytes_read == 0) break; if (fwrite (buffer, 1, strlen (buffer), tempfile) < strlen (buffer)) { error (0, errno, _("write error")); return false; } } input.fd = fileno (tempfile); input.fh = tempfile; rewind (tempfile); return true; } static bool range_file (const char *filename) { int fd; bool ok; is_stdin = STREQ (filename, "-"); if (is_stdin) { have_read_stdin = true; fd = STDIN_FILENO; filename = _("standard input"); if (O_BINARY && ! isatty (STDIN_FILENO)) freopen (NULL, "rb", stdin); } else { fd = open (filename, O_RDONLY | O_BINARY); if (fd < 0) { error (0, errno, _("cannot open %s for reading"), quote (filename)); return false; } } if (is_stdin) ok = buffer_stdin_to_tmpfile (filename, fd); else { input.fd = fd; ok = true; } ok &= validate_ranges (filename, input.fd); ok &= determine_seek_offsets (filename, input.fd); ok &= range_lines (filename, input.fd); if (is_stdin && fclose (input.fh) != 0) { error (0, errno, _("cannot close temporary file")); return false; } if (!is_stdin && close (fd) != 0) { error (0, errno, _("closing %s"), quote (filename)); return false; } return ok; } int main(int argc, char **argv) { char *buf, *buf_orig, *ranges_arg, *range_digits; enum header_mode header_mode = multiple_files; bool ok = true, seen_range_sep; int c, i, digits_count, ranges_seen; /* Initializer for file_list if no file-arguments were specified on the command line. */ static char const *const default_file_list[] = {"-", NULL}; char const *const *file_list; initialize_main (&argc, &argv); program_name = argv[0]; setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); atexit (close_stdout); have_read_stdin = false; ranges_arg = NULL; while ((c = getopt_long (argc, argv, "nqr:v", long_options, NULL)) != -1) { switch(c) { case 'n': number_mode = true; next_line_num(); break; case 'q': header_mode = never; break; case 'r': range_mode = true; ranges_arg = optarg; break; case 'v': header_mode = always; break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: usage (EXIT_FAILURE); } } if (!range_mode) usage (EXIT_FAILURE); else { ranges_seen = 0; while (*ranges_arg) { if (VALID_RANGE (*ranges_arg)) { digits_count = 0; range_digits = ranges_arg; while (VALID_RANGE (*range_digits)) { digits_count++; range_digits++; } digits_count++; buf = xcalloc (digits_count, sizeof (char)); buf_orig = buf; while (VALID_RANGE (*ranges_arg)) { *buf = *ranges_arg++; buf++; } *buf = '\0'; buf = buf_orig; ranges[range_max++] = atol (buf); free (buf); if (range_max > MAX_RANGE_SETS) error (EXIT_FAILURE, 0, _("only %d sets permitted"), MAX_RANGE_SETS / 2); ranges_seen++; } else { if (ranges_seen == 2) { if (*ranges_arg != ',') error (EXIT_FAILURE, 0, _("comma as set separator required")); ranges_seen = 0; seen_range_sep = false; } else if (ranges_seen == 1 && !seen_range_sep) { if (*ranges_arg != ':') error (EXIT_FAILURE, 0, _("colon as range separator required")); else seen_range_sep = true; } else { if (!ISDIGIT (*ranges_arg) || *ranges_arg != '-') error (EXIT_FAILURE, 0, _("range must be number")); } ++ranges_arg; } } } file_list = (optind < argc ? (char const *const *) &argv[optind] : default_file_list); if (header_mode == always || (header_mode == multiple_files && optind < argc - 1)) print_headers = true; if (O_BINARY && ! isatty (STDOUT_FILENO)) freopen (NULL, "wb", stdout); for (i = 0; file_list[i]; i++) ok &= range_file (file_list[i]); if (have_read_stdin && close (STDIN_FILENO) < 0) error (EXIT_FAILURE, errno, "-"); exit (ok ? EXIT_SUCCESS : EXIT_FAILURE); }