/* linecut -- output slices of lines from file(s)
Copyright (C) 2007 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see . */
#include
#include
#include
#include
#include "system.h"
#include "error.h"
#include "quote.h"
#include "safe-read.h"
/* The official name of this program (e.g., no `g' prefix). */
#define PROGRAM_NAME "linecut"
#define AUTHORS "Steven Schubiger"
/* Maximum of allowed range sets. */
#define MAX_RANGE_SETS (4 * 2)
/* Macro for validation of range components. */
#define VALID_RANGE(range) (ISDIGIT (range) || range == '-' || range == '+')
/* If true, range parameter provided. */
static bool range_mode;
/* If true, print numbered lines. */
static bool number_mode;
/* If true, print filename headers. */
static bool print_headers;
/* When to print the filename banners. */
enum header_mode
{
multiple_files, always, never
};
/* Are we reading from standard input? */
static bool is_stdin;
/* Have we ever read standard input? */
static bool have_read_stdin;
/* The name this program was run with. */
static char *program_name;
/* The iterator for the ranges set. */
static int range_max;
/* The sets of ranges. */
static long ranges[MAX_RANGE_SETS];
/* The file offsets to seek for. */
static off_t offsets[MAX_RANGE_SETS];
/* How many lines the stream consists of. */
static int lines;
/* Container for file-descriptor and handle. */
static struct
{
int fd;
FILE *fh;
} input;
/* Buffer for line numbers.
An 11 digit counter may overflow within an hour on a P2/466,
an 18 digit counter needs about 1000y */
#define LINE_COUNTER_BUF_LEN 20
static char line_buf[LINE_COUNTER_BUF_LEN] =
{
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',
' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '0',
'\t', '\0'
};
/* Position in `line_buf' where printing starts. This will not change
unless the number of lines is larger than 999999. */
static char *line_num_print = line_buf + LINE_COUNTER_BUF_LEN - 8;
/* Position of the first digit in `line_buf'. */
static char *line_num_start = line_buf + LINE_COUNTER_BUF_LEN - 3;
/* Position of the last digit in `line_buf'. */
static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3;
/* Preserves the `cat' function's local `newlines' between invocations. */
static int newlines2 = 0;
static struct option const long_options[] =
{
{"number", no_argument, NULL, 'n'},
{"range", required_argument, NULL, 'r'},
{"quiet", no_argument, NULL, 'q'},
{"silent", no_argument, NULL, 'q'},
{"verbose", no_argument, NULL, 'v'},
{GETOPT_HELP_OPTION_DECL},
{GETOPT_VERSION_OPTION_DECL},
{NULL, 0, NULL, 0}
};
void
usage (int status)
{
if (status != EXIT_SUCCESS)
fprintf (stderr, _("Try `%s --help' for more information.\n"),
program_name);
else
{
printf (_("\
Usage: %s [OPTION]... [FILE]...\n\
"),
program_name);
fputs (_("\
Print slices of lines of each FILE to standard output.\n\
With more than one FILE, precede each with a header giving the file name.\n\
With no FILE, or when FILE is -, read standard input.\n\
\n\
"), stdout);
fputs (_("\
Mandatory arguments to long options are mandatory for short options too.\n\
"), stdout);
fputs (_("\
-n, --number number all output lines\n\
-r, --range=N slices of lines to be output\n\
"), stdout);
fputs (_("\
-q, --quiet, --silent never print headers giving file names\n\
-v, --verbose always print headers giving file names\n\
"), stdout);
fputs (HELP_OPTION_DESCRIPTION, stdout);
fputs (VERSION_OPTION_DESCRIPTION, stdout);
fputs (_("\
\n\
N must conform to following format:\n\
N_START:N_END, ...\n\
N_START or N_END may be an absolute line position or relative-to-EOF one.\n\
"), stdout);
emit_bug_reporting_address ();
}
exit (status);
}
static void
write_header (const char *filename)
{
static bool first_file = true;
printf ("%s==> %s <==\n", (first_file ? "" : "\n"), filename);
first_file = false;
}
static void
next_line_num (void)
{
char *endp = line_num_end;
do
{
if ((*endp)++ < '9')
return;
*endp-- = '0';
}
while (endp >= line_num_start);
if (line_num_start > line_buf)
*--line_num_start = '1';
else
*line_buf = '>';
if (line_num_start < line_num_print)
line_num_print--;
}
static bool
elide_lines_seekable (const char *filename, int fd,
uintmax_t line_from, uintmax_t line_to,
off_t offset_from, off_t offset_to)
{
char buffer[BUFSIZ];
char *buf, *next, *curr;
uintmax_t line_current;
if ((lseek (fd, offset_from, SEEK_SET)) < 0)
{
error (0, errno, _("cannot lseek %s"), quote (filename));
return false;
}
line_current = line_from;
while (1)
{
size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
size_t bytes_to_write = 0;
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, _("error reading %s"), quote (filename));
return false;
}
if (bytes_read == 0)
break;
buf = buffer;
for (; line_current <= line_to; line_current++)
{
if (bytes_to_write < bytes_read)
{
if (number_mode)
{
if (fwrite (line_num_print, 1, strlen (line_num_print), stdout) < strlen (line_num_print))
{
error (0, errno, _("write error"));
return false;
}
next_line_num ();
}
/* full line with trailing newline detected. */
if (strstr (buf, "\n") != NULL)
{
curr = (char *) rawmemchr (buf, '\n');
next = curr;
next++;
*curr = '\0';
if (fwrite (buf, 1, curr - buf, stdout) < curr - buf)
{
error (0, errno, _("write error"));
return false;
}
if (fwrite ("\n", 1, 1, stdout) < 1)
{
error (0, errno, _("write error"));
return false;
}
bytes_to_write += curr - buf;
buf = next;
}
/* part of line with no trailing newline. */
else
{
if (fwrite (buf, 1, buffer + BUFSIZ - buf, stdout) < buffer + BUFSIZ - buf)
{
error (0, errno, _("write error"));
return false;
}
bytes_to_write += buffer + BUFSIZ - buf;
break;
}
}
else
break;
}
}
return true;
}
static void
relative_range_to_absolute (long *range_pos)
{
if (*range_pos < 0)
*range_pos += lines + 1;
}
static bool
range_lines (const char *filename, int fd)
{
bool ok;
int i;
long line_from, line_to;
off_t offset_from, offset_to;
if (print_headers)
write_header (filename);
for (i = 0; i < range_max; i += 2)
{
line_from = ranges[i];
line_to = ranges[i+1];
relative_range_to_absolute (&line_from);
relative_range_to_absolute (&line_to);
offset_from = offsets[i];
offset_to = offsets[i+1];
ok = elide_lines_seekable (filename, fd, line_from, line_to, offset_from, offset_to);
}
return ok;
}
static bool
determine_seek_offsets (const char *filename, int fd)
{
char buffer[BUFSIZ];
int i;
long line_iter, range_abs;
size_t bytes_offset;
off_t seek_offset;
if ((lseek (fd, 0, SEEK_SET)) < 0)
{
error (0, errno, _("cannot lseek %s"), quote (filename));
return false;
}
for (i = 0, line_iter = 0, bytes_offset = 0, seek_offset = 0; 1;)
{
size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
size_t bytes_to_iter = 0;
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, _("error reading %s"), quote (filename));
return false;
}
if (bytes_read == 0)
break;
while (bytes_to_iter < bytes_read)
{
if (buffer[bytes_to_iter] == '\n')
{
line_iter++;
break;
}
bytes_to_iter++;
}
bytes_offset += bytes_to_iter + 1;
range_abs = ranges[i];
relative_range_to_absolute (&range_abs);
if (line_iter == range_abs)
offsets[i++] = seek_offset;
if ((seek_offset = lseek (fd, bytes_offset, SEEK_SET)) < 0)
{
error (0, errno, _("cannot lseek %s"), quote (filename));
return false;
}
}
return true;
}
static bool
count_lines_in_stream (const char *filename, int fd)
{
char buffer[BUFSIZ];
uintmax_t total_lines;
for (total_lines = 0; 1;)
{
size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
size_t bytes_to_iter = 0;
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, _("error reading %s"), quote (filename));
return false;
}
if (bytes_read == 0)
break;
while (bytes_to_iter < bytes_read)
{
if (buffer[bytes_to_iter] == '\n')
total_lines++;
bytes_to_iter++;
}
}
lines = total_lines;
return true;
}
static bool
validate_ranges (const char *filename, int fd)
{
bool ok;
int i;
long line_start, line_end;
long seen_start, seen_end;
ok = count_lines_in_stream (filename, fd);
for (i = 0, seen_start = 0, seen_end = 0; i < range_max; i += 2)
{
line_start = ranges[i];
line_end = ranges[i+1];
if (line_start == 0)
error (EXIT_FAILURE, 0, _("starting line must not be 0"));
if (line_end == 0)
error (EXIT_FAILURE, 0, _("ending line must not be 0"));
relative_range_to_absolute (&line_start);
relative_range_to_absolute (&line_end);
if (line_start <= seen_start)
error (EXIT_FAILURE, 0, _("starting line overlaps with previous ending one"));
if (line_start > line_end)
error (EXIT_FAILURE, 0, _("starting line must preceed the ending line"));
if (line_start > lines)
error (EXIT_FAILURE, 0, _("starting line exceeds total lines of %s"), quote (filename));
if (line_end > lines)
error (EXIT_FAILURE, 0, _("ending line exceeds total lines of %s"), quote (filename));
seen_start = line_start;
seen_start = line_end;
}
return ok;
}
static bool
buffer_stdin_to_tmpfile (const char *filename, int fd)
{
char buffer[BUFSIZ];
FILE *tempfile;
if ((tempfile = tmpfile ()) == NULL)
{
error (0, errno, _("cannot create temporary file"));
return false;
}
while (1)
{
size_t bytes_read = safe_read (fd, buffer, BUFSIZ);
size_t bytes_to_write = 0;
if (bytes_read == SAFE_READ_ERROR)
{
error (0, errno, _("error reading %s"), quote (filename));
return false;
}
if (bytes_read == 0)
break;
if (fwrite (buffer, 1, strlen (buffer), tempfile) < strlen (buffer))
{
error (0, errno, _("write error"));
return false;
}
}
input.fd = fileno (tempfile);
input.fh = tempfile;
rewind (tempfile);
return true;
}
static bool
range_file (const char *filename)
{
int fd;
bool ok;
is_stdin = STREQ (filename, "-");
if (is_stdin)
{
have_read_stdin = true;
fd = STDIN_FILENO;
filename = _("standard input");
if (O_BINARY && ! isatty (STDIN_FILENO))
freopen (NULL, "rb", stdin);
}
else
{
fd = open (filename, O_RDONLY | O_BINARY);
if (fd < 0)
{
error (0, errno, _("cannot open %s for reading"), quote (filename));
return false;
}
}
if (is_stdin)
ok = buffer_stdin_to_tmpfile (filename, fd);
else
{
input.fd = fd;
ok = true;
}
ok &= validate_ranges (filename, input.fd);
ok &= determine_seek_offsets (filename, input.fd);
ok &= range_lines (filename, input.fd);
if (is_stdin && fclose (input.fh) != 0)
{
error (0, errno, _("cannot close temporary file"));
return false;
}
if (!is_stdin && close (fd) != 0)
{
error (0, errno, _("closing %s"), quote (filename));
return false;
}
return ok;
}
int
main(int argc, char **argv)
{
char *buf, *buf_orig, *ranges_arg, *range_digits;
enum header_mode header_mode = multiple_files;
bool ok = true, seen_range_sep;
int c, i, digits_count, ranges_seen;
/* Initializer for file_list if no file-arguments
were specified on the command line. */
static char const *const default_file_list[] = {"-", NULL};
char const *const *file_list;
initialize_main (&argc, &argv);
program_name = argv[0];
setlocale (LC_ALL, "");
bindtextdomain (PACKAGE, LOCALEDIR);
textdomain (PACKAGE);
atexit (close_stdout);
have_read_stdin = false;
ranges_arg = NULL;
while ((c = getopt_long (argc, argv, "nqr:v", long_options, NULL))
!= -1)
{
switch(c)
{
case 'n':
number_mode = true;
next_line_num();
break;
case 'q':
header_mode = never;
break;
case 'r':
range_mode = true;
ranges_arg = optarg;
break;
case 'v':
header_mode = always;
break;
case_GETOPT_HELP_CHAR;
case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
default:
usage (EXIT_FAILURE);
}
}
if (!range_mode)
usage (EXIT_FAILURE);
else
{
ranges_seen = 0;
while (*ranges_arg)
{
if (VALID_RANGE (*ranges_arg))
{
digits_count = 0;
range_digits = ranges_arg;
while (VALID_RANGE (*range_digits))
{
digits_count++;
range_digits++;
}
digits_count++;
buf = xcalloc (digits_count, sizeof (char));
buf_orig = buf;
while (VALID_RANGE (*ranges_arg))
{
*buf = *ranges_arg++;
buf++;
}
*buf = '\0';
buf = buf_orig;
ranges[range_max++] = atol (buf);
free (buf);
if (range_max > MAX_RANGE_SETS)
error (EXIT_FAILURE, 0, _("only %d sets permitted"), MAX_RANGE_SETS / 2);
ranges_seen++;
}
else
{
if (ranges_seen == 2)
{
if (*ranges_arg != ',')
error (EXIT_FAILURE, 0, _("comma as set separator required"));
ranges_seen = 0;
seen_range_sep = false;
}
else if (ranges_seen == 1 && !seen_range_sep)
{
if (*ranges_arg != ':')
error (EXIT_FAILURE, 0, _("colon as range separator required"));
else
seen_range_sep = true;
}
else
{
if (!ISDIGIT (*ranges_arg) || *ranges_arg != '-')
error (EXIT_FAILURE, 0, _("range must be number"));
}
++ranges_arg;
}
}
}
file_list = (optind < argc
? (char const *const *) &argv[optind]
: default_file_list);
if (header_mode == always
|| (header_mode == multiple_files && optind < argc - 1))
print_headers = true;
if (O_BINARY && ! isatty (STDOUT_FILENO))
freopen (NULL, "wb", stdout);
for (i = 0; file_list[i]; i++)
ok &= range_file (file_list[i]);
if (have_read_stdin && close (STDIN_FILENO) < 0)
error (EXIT_FAILURE, errno, "-");
exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
}