From a972d01e5147a6d9165c52935bf1a34d1e6d7680 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Fri, 9 Dec 2016 21:09:39 -0500 Subject: [PATCH] doc: simplify documentation building Do not bother autogenerating sed.texi from sed-in.texi. All it did was to add a few '@group' commands. Since these are rarely modified, add them manually. This simplifies the documentation-building rules and lets us distribute only one copy of the texinfo manual. * doc/groupify.sed, doc/sed-in.texi: Remove files. * doc/sed.texi: Remove the 'auto-generated' warning, as this file is no longer auto-generated. * doc/local.mk (doc_sed_TEXINFOS): Rename from sed_TEXINFOS. (dist_noinst_DATA): The above renaming enables us to remove doc/config.texi and doc/fdl.texi from this list. (dist_noinst_SCRIPTS): Remove. (doc/sed.texi): Remove rule. (doc/s-texi): Likewise. --- doc/groupify.sed | 59 - doc/local.mk | 21 +- doc/sed-in.texi | 4231 ------------------------------------------------------ doc/sed.texi | 1 - 4 files changed, 2 insertions(+), 4310 deletions(-) delete mode 100755 doc/groupify.sed delete mode 100644 doc/sed-in.texi diff --git a/doc/groupify.sed b/doc/groupify.sed deleted file mode 100755 index c733b82..0000000 --- a/doc/groupify.sed +++ /dev/null @@ -1,59 +0,0 @@ -#! /bin/sed -nf -# Script to add @address@hidden group tags to sed.texi.in -# so that comments are not separated from the instructions -# that they refer to. - -# Step 1: search for the conventional "@c start----" comment -1a\ address@hidden Do not edit this file!! It is automatically generated from sed-in.texi. -p -/address@hidden start-*$/! b - -# Step 2: loop until we find a @ command -:a -n -p -/^@/! ba - -# Step 3: process everything until a "@end" command - -# Step 3.1: Print the blank lines before the group. If we reach the "@end", -# we go back to step 1. -:b -n -/address@hidden/ { - p - b -} -/^[ ]*$/ { - p - bb -} - -# Step 3.2: Add to hold space every line until an empty one or "@end" -h -:c -n -/address@hidden example/! { - /^[ ]*$/! { - H - bc - } -} - -# Step 3.3: Working in hold space, add @address@hidden group if there are -# at least two lines. Then print the lines we processed and -# switch back to pattern space. -x -/\n/ { - s/.*/@group\ -&\ address@hidden group/ -} -p - -# Step 3.4: Switch back to pattern space, print the first blank line -# and possibly go back to step 3.1 -x -p -/address@hidden/ !bb diff --git a/doc/local.mk b/doc/local.mk index 86d97ef..b2030f4 100644 --- a/doc/local.mk +++ b/doc/local.mk @@ -14,31 +14,14 @@ # along with this program. If not, see . info_TEXINFOS = doc/sed.texi -sed_TEXINFOS = doc/config.texi doc/version.texi doc/fdl.texi +doc_sed_TEXINFOS = doc/config.texi doc/version.texi doc/fdl.texi dist_man_MANS = doc/sed.1 -dist_noinst_DATA = doc/config.texi doc/sed.x doc/sed-in.texi doc/s-texi \ - doc/fdl.texi -dist_noinst_SCRIPTS = doc/groupify.sed +dist_noinst_DATA = doc/sed.x HELP2MAN = $(top_srcdir)/build-aux/help2man SEDBIN = sed/sed AM_MAKEINFOHTMLFLAGS = --no-split -# To produce better quality output, in the example sed -# scripts we group comments with lines following them; -# since mantaining the "@address@hidden group" manually -# is a burden, we do this automatically -doc/sed.texi: $(srcdir)/doc/s-texi -doc/s-texi: doc/sed-in.texi $(srcdir)/doc/groupify.sed - sed -nf $(srcdir)/doc/groupify.sed \ - < $(srcdir)/doc/sed-in.texi > $(srcdir)/doc/sed-tmp.texi - if cmp $(srcdir)/doc/sed.texi $(srcdir)/doc/sed-tmp.texi; then \ - rm -f $(srcdir)/doc/sed-tmp.texi; \ - else \ - mv -f $(srcdir)/doc/sed-tmp.texi $(srcdir)/doc/sed.texi; \ - fi - echo stamp > $(srcdir)/doc/s-texi - doc/sed.1: sed/sed .version $(srcdir)/doc/sed.x $(AM_V_GEN)$(MKDIR_P) doc $(AM_V_at)rm -rf $@ address@hidden diff --git a/doc/sed-in.texi b/doc/sed-in.texi deleted file mode 100644 index 5aca0d0..0000000 --- a/doc/sed-in.texi +++ /dev/null @@ -1,4231 +0,0 @@ -\input texinfo @c -*-texinfo-*- address@hidden address@hidden -- Stuff that needs adding: ---------------------------------------------- address@hidden (nothing!) address@hidden -------------------------------------------------------------------------- address@hidden Check for consistency: regexps in @code, text that they match in @samp. address@hidden address@hidden Tips: address@hidden @command for command address@hidden @samp for command fragments: @samp{cat -s} address@hidden @code for sed commands and flags address@hidden Use ``quote'' not `quote' or "quote". address@hidden address@hidden %**start of header address@hidden sed.info address@hidden sed, a stream editor address@hidden %**end of header - address@hidden @smallbook - address@hidden version.texi - address@hidden Combine indices. address@hidden ky cp address@hidden pg cp address@hidden tp cp - address@hidden op address@hidden op fn - address@hidden config.texi - address@hidden -This file documents version @value{VERSION} of address@hidden, a stream editor. - -Copyright @copyright{} 1998-2016 Free Software Foundation, Inc. - address@hidden -Permission is granted to copy, distribute and/or modify this document -under the terms of the GNU Free Documentation License, Version 1.3 -or any later version published by the Free Software Foundation; -with no Invariant Sections, no Front-Cover Texts, and no -Back-Cover Texts. A copy of the license is included in the -section entitled ``GNU Free Documentation License''. address@hidden quotation address@hidden copying - address@hidden off - address@hidden address@hidden @value{SSED}, a stream editor address@hidden version @value{VERSION}, @value{UPDATED} address@hidden by Ken Pizzini, Paolo Bonzini - address@hidden address@hidden 0pt plus 1filll address@hidden address@hidden titlepage - address@hidden - address@hidden address@hidden Top address@hidden @value{SSED} - address@hidden address@hidden ifnottex - address@hidden -* Introduction:: Introduction -* Invoking sed:: Invocation -* sed scripts:: @command{sed} scripts -* sed addresses:: Addresses: selecting lines -* sed regular expressions:: Regular expressions: selecting text -* advanced sed:: Advanced @command{sed}: cycles and buffers -* Examples:: Some sample scripts -* Limitations:: Limitations and (non-)limitations of @value{SSED} -* Other Resources:: Other resources for learning about @command{sed} -* Reporting Bugs:: Reporting bugs -* GNU Free Documentation License:: Copying and sharing this manual -* Concept Index:: A menu with all the topics in this manual. -* Command and Option Index:: A menu with all @command{sed} commands and - command-line options. address@hidden menu - - address@hidden Introduction address@hidden Introduction - address@hidden Stream editor address@hidden is a stream editor. -A stream editor is used to perform basic text -transformations on an input stream -(a file or input from a pipeline). -While in some ways similar to an editor which -permits scripted edits (such as @command{ed}), address@hidden works by making only one pass over the -input(s), and is consequently more efficient. -But it is @command{sed}'s ability to filter text in a pipeline -which particularly distinguishes it from other types of -editors. - - address@hidden Invoking sed address@hidden Running sed - -This chapter covers how to run @command{sed}. Details of @command{sed} -scripts and individual @command{sed} commands are discussed in the -next chapter. - address@hidden -* Overview:: -* Command-Line Options:: -* Exit status:: address@hidden menu - - address@hidden Overview address@hidden Overview -Normally @command{sed} is invoked like this: - address@hidden -sed SCRIPT INPUTFILE... address@hidden example - -For example, to replace all occurrences of @samp{hello} to @samp{world} -in the file @file{input.txt}: - address@hidden -sed 's/hello/world/' input.txt > output.txt address@hidden example - address@hidden stdin address@hidden standard input -If you do not specify @var{INPUTFILE}, or if @var{INPUTFILE} is @file{-}, address@hidden filters the contents of the standard input. The following -commands are equivalent: - address@hidden -sed 's/hello/world/' input.txt > output.txt -sed 's/hello/world/' < input.txt > output.txt -cat input.txt | sed 's/hello/world/' - > output.txt address@hidden example - address@hidden stdout address@hidden output address@hidden standard output address@hidden -i, example address@hidden writes output to standard output. Use @option{-i} to edit -files in-place instead of printing to standard output. -See also the @code{W} and @code{s///w} commands for writing output to -other files. The following command modifies @file{file.txt} and -does not produce any output: - address@hidden -sed -i 's/hello/world' file.txt address@hidden example - address@hidden -n, example address@hidden p, example address@hidden suppressing output address@hidden output, suppressing -By default @command{sed} prints all processed input (except input -that has been modified/deleted by commands such as @command{d}). -Use @option{-n} to suppress output, and the @code{p} command -to print specific lines. The following command prints only line 45 -of the input file: - address@hidden -sed -n '45p' file.txt address@hidden example - - - address@hidden multiple files address@hidden -s, example address@hidden treats multiple input files as one long stream. -The following example prints the first line of the first file -(@file{one.txt}) and the last line of the last file (@file{three.txt}). -Use @option{-s} to reverse this behavior. - address@hidden -sed -n '1p ; $p' one.txt two.txt three.txt address@hidden example - - address@hidden -e, example address@hidden --expression, example address@hidden -f, example address@hidden --file, example address@hidden script parameter address@hidden parameters, script -Without @option{-e} or @option{-f} options, @command{sed} uses -the first non-option parameter as the @var{script}, and the following -non-option parameters as input files. -If @option{-e} or @option{-f} options are used to specify a @var{script}, -all non-option parameters are taken as input files. -Options @option{-e} and @option{-f} can be combined, and can appear -multiple times (in which case the final effective @var{script} will be -concatenation of all the individual @var{script}s). - -The following examples are equivalent: - address@hidden -sed 's/hello/world/' input.txt > output.txt - -sed -e 's/hello/world/' input.txt > output.txt -sed --expression='s/hello/world/' input.txt > output.txt - -echo 's/hello/world/' > myscript.sed -sed -f myscript.sed input.txt > output.txt -sed --file=myscript.sed input.txt > output.txt address@hidden example - - address@hidden Command-Line Options address@hidden Command-Line Options - -The full format for invoking @command{sed} is: - address@hidden -sed OPTIONS... [SCRIPT] [INPUTFILE...] address@hidden example - address@hidden may be invoked with the following command-line options: - address@hidden @code address@hidden --version address@hidden --version address@hidden Version, printing -Print out the version of @command{sed} that is being run and a copyright notice, -then exit. - address@hidden --help address@hidden --help address@hidden Usage summary, printing -Print a usage message briefly summarizing these command-line options -and the bug-reporting address, -then exit. - address@hidden -n address@hidden --quiet address@hidden --silent address@hidden -n address@hidden --quiet address@hidden --silent address@hidden Disabling autoprint, from command line -By default, @command{sed} prints out the pattern space -at the end of each cycle through the script (@pxref{Execution Cycle, , -How @code{sed} works}). -These options disable this automatic printing, -and @command{sed} only produces output when explicitly told to -via the @code{p} command. - address@hidden -e @var{script} address@hidden address@hidden address@hidden -e address@hidden --expression address@hidden Script, from command line -Add the commands in @var{script} to the set of commands to be -run while processing the input. - address@hidden -f @var{script-file} address@hidden address@hidden address@hidden -f address@hidden --file address@hidden Script, from a file -Add the commands contained in the file @var{script-file} -to the set of commands to be run while processing the input. - address@hidden address@hidden address@hidden address@hidden address@hidden -i address@hidden --in-place address@hidden In-place editing, activating address@hidden @value{SSEDEXT}, in-place editing -This option specifies that files are to be edited in-place. address@hidden does this by creating a temporary file and -sending output to this file rather than to the standard address@hidden applies to commands such as @code{=}, address@hidden, @code{c}, @code{i}, @code{l}, @code{p}. You can -still write to the standard output by using the @code{w} address@hidden @value{SSEDEXT}, @file{/dev/stdout} file -or @code{W} commands together with the @file{/dev/stdout} -special file}. - -This option implies @option{-s}. - -When the end of the file is reached, the temporary file is -renamed to the output file's original name. The extension, -if supplied, is used to modify the name of the old file -before renaming the temporary file, thereby making a backup address@hidden that @value{SSED} creates the backup -file whether or not any output is actually changed.}). - address@hidden In-place editing, Perl-style backup file names -This rule is followed: if the extension doesn't contain a @code{*}, -then it is appended to the end of the current filename as a -suffix; if the extension does contain one or more @code{*} -characters, then @emph{each} asterisk is replaced with the -current filename. This allows you to add a prefix to the -backup file, instead of (or in addition to) a suffix, or -even to place backup copies of the original files into another -directory (provided the directory already exists). - -If no extension is supplied, the original file is -overwritten without making a backup. - address@hidden -l @var{N} address@hidden address@hidden address@hidden -l address@hidden --line-length address@hidden Line length, setting -Specify the default line-wrap length for the @code{l} command. -A length of 0 (zero) means to never wrap long lines. If -not specified, it is taken to be 70. - address@hidden --posix address@hidden --posix address@hidden @value{SSEDEXT}, disabling address@hidden includes several extensions to @acronym{POSIX} -sed. In order to simplify writing portable scripts, this -option disables all the extensions that this manual documents, -including additional commands. address@hidden @code{POSIXLY_CORRECT} behavior, enabling -Most of the extensions accept @command{sed} programs that -are outside the syntax mandated by @acronym{POSIX}, but some -of them (such as the behavior of the @command{N} command -described in @ref{Reporting Bugs}) actually violate the -standard. If you want to disable only the latter kind of -extension, you can set the @code{POSIXLY_CORRECT} variable -to a non-empty value. - address@hidden -b address@hidden --binary address@hidden -b address@hidden --binary -This option is available on every platform, but is only effective where the -operating system makes a distinction between text files and binary files. -When such a distinction is made---as is the case for MS-DOS, Windows, -Cygwin---text files are composed of lines separated by a carriage return address@hidden a line feed character, and @command{sed} does not see the -ending CR. When this option is specified, @command{sed} will open -input files in binary mode, thus not requesting this special processing -and considering lines to end at a line feed. - address@hidden --follow-symlinks address@hidden --follow-symlinks -This option is available only on platforms that support -symbolic links and has an effect only if option @option{-i} -is specified. In this case, if the file that is specified -on the command line is a symbolic link, @command{sed} will -follow the link and edit the ultimate destination of the -link. The default behavior is to break the symbolic link, -so that the link destination will not be modified. - address@hidden -E address@hidden -r address@hidden --regexp-extended address@hidden -E address@hidden -r address@hidden --regexp-extended address@hidden Extended regular expressions, choosing address@hidden @acronym{GNU} extensions, extended regular expressions -Use extended regular expressions rather than basic -regular expressions. Extended regexps are those that address@hidden accepts; they can be clearer because they -usually have fewer backslashes. -Historically this was a @acronym{GNU} extension, -but the @option{-E} -extension has since been added to the POSIX standard -(http://austingroupbugs.net/view.php?id=528), -so use @option{-E} for portability. -GNU sed has accepted @option{-E} as an undocumented option for years, -and *BSD seds have accepted @option{-E} for years as well, -but scripts that use @option{-E} might not port to other older systems. address@hidden syntax, , Extended regular expressions}. - - address@hidden -s address@hidden --separate address@hidden -s address@hidden --separate address@hidden Working on separate files -By default, @command{sed} will consider the files specified on the -command line as a single continuous long stream. This @value{SSED} -extension allows the user to consider them as separate files: -range addresses (such as @samp{/abc/,/def/}) are not allowed -to span several files, line numbers are relative to the start -of each file, @code{$} refers to the last line of each file, -and files invoked from the @code{R} commands are rewound at the -start of each file. - address@hidden --sandbox address@hidden --sandbox address@hidden Sandbox mode -In sandbox mode, @code{e/w/r} commands are rejected - programs containing -them will be aborted without being run. Sandbox mode ensures @command{sed} -operates only on the input files designated on the command line, and -cannot run external programs. - - address@hidden -u address@hidden --unbuffered address@hidden -u address@hidden --unbuffered address@hidden Unbuffered I/O, choosing -Buffer both input and output as minimally as practical. -(This is particularly useful if the input is coming from -the likes of @samp{tail -f}, and you wish to see the transformed -output as soon as possible.) - address@hidden -z address@hidden --null-data address@hidden --zero-terminated address@hidden -z address@hidden --null-data address@hidden --zero-terminated -Treat the input as a set of lines, each terminated by a zero byte -(the ASCII @samp{NUL} character) instead of a newline. This option can -be used with commands like @samp{sort -z} and @samp{find -print0} -to process arbitrary file names. address@hidden table - -If no @option{-e}, @option{-f}, @option{--expression}, or @option{--file} -options are given on the command-line, -then the first non-option argument on the command line is -taken to be the @var{script} to be executed. - address@hidden Files to be processed as input -If any command-line parameters remain after processing the above, -these parameters are interpreted as the names of input files to -be processed. address@hidden Standard input, processing as input -A file name of @samp{-} refers to the standard input stream. -The standard input will be processed if no file names are specified. - address@hidden Exit status address@hidden Exit status address@hidden exit status -An exit status of zero indicates success, and a nonzero value -indicates failure. @value{SSED} returns the following exit status -error values: - address@hidden @asis address@hidden 0 -Successful completion. - address@hidden 1 -Invalid command, invalid syntax, invalid regular expression or a address@hidden extension command used with @option{--posix}. - address@hidden 2 -One or more of the input file specified on the command line could not be -opened (e.g. if a file is not found, or read permission is denied). -Processing continued with other files. - address@hidden 4 -An I/O error, or a serious processing error during runtime, address@hidden aborted immediately. address@hidden table - address@hidden Q, example address@hidden exit status, example -Additionally, the commands @code{q} and @code{Q} can be used to terminate address@hidden with a custom exit code value (this is a @value{SSED} extension): - address@hidden -$ echo | sed 'Q42' ; echo $? -42 address@hidden example - - address@hidden sed scripts address@hidden @command{sed} scripts - - address@hidden -* sed script overview:: @command{sed} script overview -* sed commands list:: @command{sed} commands summary -* The "s" Command:: @command{sed}'s Swiss Army Knife -* Common Commands:: Often used commands -* Other Commands:: Less frequently used commands -* Programming Commands:: Commands for @command{sed} gurus -* Extended Commands:: Commands specific of @value{SSED} address@hidden menu - address@hidden sed script overview address@hidden @command{sed} script overview - address@hidden @command{sed} script structure address@hidden Script structure - -A @command{sed} program consists of one or more @command{sed} commands, -passed in by one or more of the address@hidden, @option{-f}, @option{--expression}, and @option{--file} -options, or the first non-option argument if zero of these -options are used. -This document will refer to ``the'' @command{sed} script; -this is understood to mean the in-order concatenation -of all of the @var{script}s and @var{script-file}s passed in. address@hidden - - address@hidden @command{sed} commands syntax address@hidden syntax, @command{sed} commands address@hidden addresses, syntax address@hidden syntax, addresses address@hidden commands follow this syntax: - address@hidden address@hidden address@hidden example - address@hidden is a single-letter @command{sed} command. address@hidden TODO: add @pxref{commands} when there is a command-list section. address@hidden is an optional line address. If @code{[addr]} is specified, -the command @var{X} will be executed only on the matched lines. address@hidden can be a single line number, a regular expression, -or a range of lines (@pxref{sed addresses}). -Additional @code{[options]} are used for some @command{sed} commands. - address@hidden @command{d}, example address@hidden address range, example address@hidden example, address range -The following example deletes lines 30 to 35 in the input. address@hidden,35} is an address range. @command{d} is the delete command: - address@hidden -sed '30,35d' input.txt > output.txt address@hidden example - address@hidden @command{q}, example address@hidden regular expression, example address@hidden example, regular expression -The following example prints all input until a line -starting with the word @samp{foo} is found. If such line is found, address@hidden will terminate with exit status 42. -If such line was not found (and no other error occurred), @command{sed} -will exit with status 0. address@hidden/^foo/} is a regular-expression address. address@hidden is the quit command. @code{42} is the command option. - address@hidden -sed '/^foo/q42' input.txt > output.txt address@hidden example - - address@hidden multiple @command{sed} commands address@hidden @command{sed} commands, multiple address@hidden newline, command separator address@hidden semicolons, command separator address@hidden ;, command separator address@hidden -e, example address@hidden -f, example -Commands within a @var{script} or @var{script-file} can be -separated by semicolons (@code{;}) or newlines (ASCII 10). -Multiple scripts can be specified with @option{-e} or @option{-f} -options. - -The following examples are all equivalent. They perform two @command{sed} -operations: deleting any lines matching the regular expression @code{/^foo/}, -and replacing all occurrences of the string @samp{hello} with @samp{world}: - address@hidden -sed '/^foo/d ; s/hello/world/' input.txt > output.txt - -sed -e '/^foo/d' -e 's/hello/world/' input.txt > output.txt - -echo '/^foo/d' > script.sed -echo 's/hello/world/' >> script.sed -sed -f script.sed input.txt > output.txt - -echo 's/hello/world/' > script2.sed -sed -e '/^foo/d' -f script2.sed input.txt > output.txt address@hidden example - - address@hidden @command{a}, and semicolons address@hidden @command{c}, and semicolons address@hidden @command{i}, and semicolons -Commands @command{a}, @command{c}, @command{i}, due to their syntax, -cannot be followed by semicolons working as command separators and -thus should be terminated -with newlines or be placed at the end of a @var{script} or @var{script-file}. -Commands can also be preceded with optional non-significant -whitespace characters. - - - address@hidden sed commands list address@hidden @command{sed} commands summary - -The following commands are supported in @value{SSED}. -Some are standard POSIX commands, while other are @value{SSEDEXT}. -Details and examples for each command are in the following sections. -(Mnemonics) are shown in parentheses. - address@hidden @code - address@hidden a\ address@hidden @var{text} -Append @var{text} after a line. - address@hidden a @var{text} -Append @var{text} after a line (alternative syntax). - address@hidden b @var{label} -Branch unconditionally to @var{label}. -The @var{label} may be omitted, in which case the next cycle is started. - address@hidden c\ address@hidden @var{text} -Replace (change) lines with @var{text}. - address@hidden c @var{text} -Replace (change) lines with @var{text} (alternative syntax). - address@hidden d -Delete the pattern space; -immediately start next cycle. - address@hidden D -If pattern space contains newlines, delete text in the pattern -space up to the first newline, and restart cycle with the resultant -pattern space, without reading a new line of input. - -If pattern space contains no newline, start a normal new cycle as if -the @code{d} command was issued. address@hidden TODO: add a section about D+N and D+n commands - address@hidden e -Executes the command that is found in pattern space and -replaces the pattern space with the output; a trailing newline -is suppressed. - address@hidden e @var{command} -Executes @var{command} and sends its output to the output stream. -The command can run across multiple lines, all but the last ending with -a back-slash. - address@hidden F -(filename) Print the file name of the current input file (with a trailing -newline). - address@hidden g -Replace the contents of the pattern space with the contents of the hold space. - address@hidden G -Append a newline to the contents of the pattern space, -and then append the contents of the hold space to that of the pattern space. - address@hidden h -(hold) Replace the contents of the hold space with the contents of the -pattern space. - address@hidden H -Append a newline to the contents of the hold space, -and then append the contents of the pattern space to that of the hold space. - address@hidden i\ address@hidden @var{text} -insert @var{text} before a line. - address@hidden i @var{text} -insert @var{text} before a line (alternative syntax). - address@hidden l -Print the pattern space in an unambiguous form. - address@hidden n -(next) If auto-print is not disabled, print the pattern space, -then, regardless, replace the pattern space with the next line of input. -If there is no more input then @command{sed} exits without processing -any more commands. - address@hidden N -Add a newline to the pattern space, -then append the next line of input to the pattern space. -If there is no more input then @command{sed} exits without processing -any more commands. - address@hidden p -Print the pattern space. address@hidden useful with @option{-n} - address@hidden P -Print the pattern space, up to the first . - address@hidden address@hidden -(quit) Exit @command{sed} without processing any more commands or input. - address@hidden address@hidden -(quit) This command is the same as @code{q}, but will not print the -contents of pattern space. Like @code{q}, it provides the -ability to return an exit code to the caller. address@hidden useful to quit on a conditional without printing - address@hidden r filename -Reads text file a file. Example: - address@hidden R filename -Queue a line of @var{filename} to be read and -inserted into the output stream at the end of the current cycle, -or when the next input line is read. address@hidden useful to interleave files - address@hidden address@hidden/regexp/replacement/[flags]} -(substitute) Match the regular-expression against the content of the -pattern space. If found, replace matched string with address@hidden - address@hidden t @var{label} -(test) Branch to @var{label} only if there has been a successful address@hidden since the last input line was read or conditional -branch was taken. The @var{label} may be omitted, in which case the -next cycle is started. - address@hidden T @var{label} -(test) Branch to @var{label} only if there have been no successful address@hidden since the last input line was read or -conditional branch was taken. The @var{label} may be omitted, -in which case the next cycle is started. - address@hidden v @var{[version]} -(version) This command does nothing, but makes @command{sed} fail if address@hidden extensions are not supported, or if the requested version -is not available. - address@hidden w filename -Write the pattern space to @var{filename}. - address@hidden W filename -Write to the given filename the portion of the pattern space up to -the first newline - address@hidden x -Exchange the contents of the hold and pattern spaces. - - address@hidden y/src/dst/ -Transliterate any characters in the pattern space which match -any of the @var{source-chars} with the corresponding character -in @var{dest-chars}. - - address@hidden z -(zap) This command empties the content of pattern space. - address@hidden # -A comment, until the next newline. - - address@hidden @{ @var{cmd ; cmd ...} @} -Group several commands together. address@hidden useful for multiple commands on same address - address@hidden = -Print the current input line number (with a trailing newline). - address@hidden : @var{label} -Specify the location of @var{label} for branch commands (@code{b}, address@hidden, @code{T}). - address@hidden table - - address@hidden The "s" Command address@hidden The @code{s} Command - -The @code{s} command (as in substitute) is probably the most important -in @command{sed} and has a lot of different options. The syntax of -the @code{s} command is address@hidden/@var{regexp}/@var{replacement}/@var{flags}}. - -Its basic concept is simple: the @code{s} command attempts to match -the pattern space against the supplied regular expression @var{regexp}; -if the match is successful, then that portion of the -pattern space which was matched is replaced with @var{replacement}. - -For details about @var{regexp} syntax @pxref{Regexp Addresses,,Regular -Expression Addresses}. - address@hidden Backreferences, in regular expressions address@hidden Parenthesized substrings -The @var{replacement} can contain @address@hidden (@var{n} being -a number from 1 to 9, inclusive) references, which refer to -the portion of the match which is contained between the @var{n}th address@hidden(} and its matching @code{\)}. -Also, the @var{replacement} can contain unescaped @code{&} -characters which reference the whole matched portion -of the pattern space. - address@hidden TODO: xref to backreference section mention @var{\'}. - -The @code{/} -characters may be uniformly replaced by any other single -character within any given @code{s} command. The @code{/} -character (or whatever other character is used in its stead) -can appear in the @var{regexp} or @var{replacement} -only if it is preceded by a @code{\} character. - - - address@hidden @value{SSEDEXT}, case modifiers in @code{s} commands -Finally, as a @value{SSED} extension, you can include a -special sequence made of a backslash and one of the letters address@hidden, @code{l}, @code{U}, @code{u}, or @code{E}. -The meaning is as follows: - address@hidden @code address@hidden \L -Turn the replacement -to lowercase until a @code{\U} or @code{\E} is found, - address@hidden \l -Turn the -next character to lowercase, - address@hidden \U -Turn the replacement to uppercase -until a @code{\L} or @code{\E} is found, - address@hidden \u -Turn the next character -to uppercase, - address@hidden \E -Stop case conversion started by @code{\L} or @code{\U}. address@hidden table - -When the @code{g} flag is being used, case conversion does not -propagate from one occurrence of the regular expression to -another. For example, when the following command is executed -with @samp{a-b-} in pattern space: address@hidden -s/\(b\?\)-/x\u\1/g address@hidden example - address@hidden -the output is @samp{axxB}. When replacing the first @samp{-}, -the @samp{\u} sequence only affects the empty replacement of address@hidden It does not affect the @code{x} character that is -added to pattern space when replacing @code{b-} with @code{xB}. - -On the other hand, @code{\l} and @code{\u} do affect the remainder -of the replacement text if they are followed by an empty substitution. -With @samp{a-b-} in pattern space, the following command: address@hidden -s/\(b\?\)-/\u\1x/g address@hidden example - address@hidden -will replace @samp{-} with @samp{X} (uppercase) and @samp{b-} with address@hidden If this behavior is undesirable, you can prevent it by -adding a @samp{\E} sequence---after @samp{\1} in this case. - -To include a literal @code{\}, @code{&}, or newline in the final -replacement, be sure to precede the desired @code{\}, @code{&}, -or newline in the @var{replacement} with a @code{\}. - address@hidden s command, option flags address@hidden Substitution of text, options -The @code{s} command can be followed by zero or more of the -following @var{flags}: - address@hidden @code address@hidden g address@hidden Global substitution address@hidden Replacing all text matching regexp in a line -Apply the replacement to @emph{all} matches to the @var{regexp}, -not just the first. - address@hidden @var{number} address@hidden Replacing only @var{n}th match of regexp in a line -Only replace the @var{number}th match of the @var{regexp}. - address@hidden @acronym{GNU} extensions, @code{g} and @var{number} modifier -interaction in @code{s} command address@hidden Mixing @code{g} and @var{number} modifiers in the @code{s} command -Note: the @sc{posix} standard does not specify what should happen -when you mix the @code{g} and @var{number} modifiers, -and currently there is no widely agreed upon meaning -across @command{sed} implementations. -For @value{SSED}, the interaction is defined to be: -ignore matches before the @var{number}th, -and then match and replace all matches from -the @var{number}th on. - address@hidden p address@hidden Text, printing after substitution -If the substitution was made, then print the new pattern space. - -Note: when both the @code{p} and @code{e} options are specified, -the relative ordering of the two produces very different results. -In general, @code{ep} (evaluate then print) is what you want, -but operating the other way round can be useful for debugging. -For this reason, the current version of @value{SSED} interprets -specially the presence of @code{p} options both before and after address@hidden, printing the pattern space before and after evaluation, -while in general flags for the @code{s} command show their -effect just once. This behavior, although documented, might -change in future versions. - address@hidden w @var{file-name} address@hidden Text, writing to a file after substitution address@hidden @value{SSEDEXT}, @file{/dev/stdout} file address@hidden @value{SSEDEXT}, @file{/dev/stderr} file -If the substitution was made, then write out the result to the named file. -As a @value{SSED} extension, two special values of @var{file-name} are -supported: @file{/dev/stderr}, which writes the result to the standard -error, and @file{/dev/stdout}, which writes to the standard address@hidden is equivalent to @code{p} unless the @option{-i} -option is being used.} - address@hidden e address@hidden Evaluate Bourne-shell commands, after substitution address@hidden Subprocesses address@hidden @value{SSEDEXT}, evaluating Bourne-shell commands address@hidden @value{SSEDEXT}, subprocesses -This command allows one to pipe input from a shell command -into pattern space. If a substitution was made, the command -that is found in pattern space is executed and pattern space -is replaced with its output. A trailing newline is suppressed; -results are undefined if the command to be executed contains -a @sc{nul} character. This is a @value{SSED} extension. - address@hidden I address@hidden i address@hidden @acronym{GNU} extensions, @code{I} modifier address@hidden Case-insensitive matching -The @code{I} modifier to regular-expression matching is a @acronym{GNU} -extension which makes @command{sed} match @var{regexp} in a -case-insensitive manner. - address@hidden M address@hidden m address@hidden @value{SSEDEXT}, @code{M} modifier -The @code{M} modifier to regular-expression matching is a @value{SSED} -extension which directs @value{SSED} to match the regular expression -in @cite{multi-line} mode. The modifier causes @code{^} and @code{$} to -match respectively (in addition to the normal behavior) the empty string -after a newline, and the empty string before a newline. There are -special character sequences address@hidden PERL -(@code{\`} and @code{\'}) address@hidden ifclear -which always match the beginning or the end of the buffer. -In addition, -the period character does not match a new-line character in -multi-line mode. - - address@hidden table - address@hidden Common Commands address@hidden Often-Used Commands - -If you use @command{sed} at all, you will quite likely want to know -these commands. - address@hidden @code address@hidden # -[No addresses allowed.] - address@hidden # (comments) address@hidden Comments, in scripts -The @code{#} character begins a comment; -the comment continues until the next newline. - address@hidden Portability, comments -If you are concerned about portability, be aware that -some implementations of @command{sed} (which are not @sc{posix} -conforming) may only support a single one-line comment, -and then only when the very first character of the script is a @code{#}. - address@hidden -n, forcing from within a script address@hidden Caveat --- #n on first line -Warning: if the first two characters of the @command{sed} script -are @code{#n}, then the @option{-n} (no-autoprint) option is forced. -If you want to put a comment in the first line of your script -and that comment begins with the letter @samp{n} -and you do not want this behavior, -then be sure to either use a capital @samp{N}, -or place at least one space before the @samp{n}. - address@hidden q address@hidden address@hidden q (quit) command address@hidden @value{SSEDEXT}, returning an exit code address@hidden Quitting -Exit @command{sed} without processing any more commands or input. - -Example: stop after printing the second line: address@hidden -$ seq 3 | sed 2q -1 -2 address@hidden example - -This command only accepts a single address. -Note that the current pattern space is printed if auto-print is -not disabled with the @option{-n} options. The ability to return -an exit code from the @command{sed} script is a @value{SSED} extension. - -See also the @value{SSED} extension @code{Q} command which quits silently -without printing the current pattern space. - address@hidden d address@hidden d (delete) command address@hidden Text, deleting -Delete the pattern space; -immediately start next cycle. - -Example: delete the second input line: address@hidden -$ seq 3 | sed 2d -1 -3 address@hidden example - address@hidden p address@hidden p (print) command address@hidden Text, printing -Print out the pattern space (to the standard output). -This command is usually only used in conjunction with the @option{-n} -command-line option. - -Example: print only the second input line: address@hidden -$ seq 3 | sed -n 2p -2 address@hidden example - address@hidden n address@hidden n (next-line) command address@hidden Next input line, replace pattern space with address@hidden Read next input line -If auto-print is not disabled, print the pattern space, -then, regardless, replace the pattern space with the next line of input. -If there is no more input then @command{sed} exits without processing -any more commands. - -This command is useful to skip lines (e.g. process every Nth line). - -Example: perform substitution on every 3rd line (i.e. two @code{n} commands -skip two lines): address@hidden on address@hidden on address@hidden -$ seq 6 | sed 'n;n;s/./x/' -1 -2 -x -4 -5 -x address@hidden example - address@hidden provides an extension address syntax of @address@hidden -to achieve the same result: - address@hidden -$ seq 6 | sed '0~3s/./x/' -1 -2 -x -4 -5 -x address@hidden example - address@hidden off address@hidden off - - address@hidden @{ @var{commands} @} address@hidden @address@hidden command grouping address@hidden Grouping commands address@hidden Command groups -A group of commands may be enclosed between address@hidden@{} and @address@hidden characters. -This is particularly useful when you want a group of commands -to be triggered by a single address (or address-range) match. - -Example: perform substitution then print the second input line: address@hidden on address@hidden on address@hidden -$ seq 3 | sed -n 'address@hidden/2/X/ ; address@hidden' -X address@hidden example address@hidden off address@hidden off - address@hidden table - - address@hidden Other Commands address@hidden Less Frequently-Used Commands - -Though perhaps less frequently used than those in the previous -section, some very small yet useful @command{sed} scripts can be built with -these commands. - address@hidden @code address@hidden y/@var{source-chars}/@var{dest-chars}/ address@hidden y (transliterate) command address@hidden Transliteration -Transliterate any characters in the pattern space which match -any of the @var{source-chars} with the corresponding character -in @var{dest-chars}. - -Example: transliterate @samp{a-j} into @samp{0-9}: address@hidden on address@hidden on address@hidden -$ echo hello world | sed 'y/abcdefghij/0123456789/' -74llo worl3 address@hidden example address@hidden off address@hidden off - -(The @code{/} characters may be uniformly replaced by -any other single character within any given @code{y} command.) - -Instances of the @code{/} (or whatever other character is used in its stead), address@hidden, or newlines can appear in the @var{source-chars} or @var{dest-chars} -lists, provide that each instance is escaped by a @code{\}. -The @var{source-chars} and @var{dest-chars} lists @emph{must} -contain the same number of characters (after de-escaping). - -See the @command{tr} command from GNU coreutils for similar functionality. - address@hidden a @var{text} -Appending @var{text} after a line. This is a @acronym{GNU} extension -to the standard @code{a} command - see below for details. - -Example: Add the word @samp{hello} after the second line: address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2a hello' -1 -2 -hello -3 address@hidden example address@hidden off address@hidden off - -Leading whitespaces after the @code{a} command are ignored. -The text to add is read until the end of the line. - - address@hidden a\ address@hidden @var{text} address@hidden a (append text lines) command address@hidden Appending text after a line address@hidden Text, appending -Appending @var{text} after a line. - -Example: Add @samp{hello} after the second line -(@print{} indicates printed output lines): address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2a\ -hello' address@hidden address@hidden address@hidden address@hidden address@hidden example address@hidden off address@hidden off - -The @code{a} command queues the lines of text which follow this command -(each but the last ending with a @code{\}, -which are removed from the output) -to be output at the end of the current cycle, -or when the next input line is read. - address@hidden @value{SSEDEXT}, two addresses supported by most commands -As a @acronym{GNU} extension, this command accepts two addresses. - -Escape sequences in @var{text} are processed, so you should -use @code{\\} in @var{text} to print a single backslash. - -The commands resume after the last line without a backslash (@code{\}) - address@hidden in the following example: address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2a\ -hello\ -world -3s/./X/' address@hidden address@hidden address@hidden address@hidden address@hidden address@hidden example address@hidden off address@hidden off - -As a @acronym{GNU} extension, the @code{a} command and @var{text} can be -separated into two @code{-e} parameters, enabling easier scripting: address@hidden on address@hidden on address@hidden -$ seq 3 | sed -e '2a\' -e hello -1 -2 -hello -3 - -$ sed -e '2a\' -e "$VAR" address@hidden example address@hidden off address@hidden off - address@hidden i @var{text} -insert @var{text} before a line. This is a @acronym{GNU} extension -to the standard @code{i} command - see below for details. - -Example: Insert the word @samp{hello} before the second line: address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2i hello' -1 -hello -2 -3 address@hidden example address@hidden off address@hidden off - -Leading whitespaces after the @code{i} command are ignored. -The text to add is read until the end of the line. - address@hidden i\ address@hidden @var{text} address@hidden i (insert text lines) command address@hidden Inserting text before a line address@hidden Text, insertion -Immediately output the lines of text which follow this command. - -Example: Insert @samp{hello} before the second line -(@print{} indicates printed output lines): address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2i\ -hello' address@hidden address@hidden address@hidden address@hidden address@hidden example address@hidden off address@hidden off - address@hidden @value{SSEDEXT}, two addresses supported by most commands -As a @acronym{GNU} extension, this command accepts two addresses. - -Escape sequences in @var{text} are processed, so you should -use @code{\\} in @var{text} to print a single backslash. - -The commands resume after the last line without a backslash (@code{\}) - address@hidden in the following example: address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2i\ -hello\ -world -s/./X/' address@hidden address@hidden address@hidden address@hidden address@hidden address@hidden example address@hidden off address@hidden off - -As a @acronym{GNU} extension, the @code{i} command and @var{text} can be -separated into two @code{-e} parameters, enabling easier scripting: address@hidden on address@hidden on address@hidden -$ seq 3 | sed -e '2i\' -e hello -1 -hello -2 -3 - -$ sed -e '2i\' -e "$VAR" address@hidden example address@hidden off address@hidden off - address@hidden c @var{text} -Replaces the line(s) with @var{text}. This is a @acronym{GNU} extension -to the standard @code{c} command - see below for details. - -Example: Replace the 2nd to 9th lines with the word @samp{hello}: address@hidden on address@hidden on address@hidden -$ seq 10 | sed '2,9c hello' -1 -hello -10 address@hidden example address@hidden off address@hidden off - -Leading whitespaces after the @code{c} command are ignored. -The text to add is read until the end of the line. - address@hidden c\ address@hidden @var{text} address@hidden c (change to text lines) command address@hidden Replacing selected lines with other text -Delete the lines matching the address or address-range, -and output the lines of text which follow this command. - -Example: Replace 2nd to 4th lines with the words @samp{hello} and address@hidden (@print{} indicates printed output lines): address@hidden on address@hidden on address@hidden -$ seq 5 | sed '2,4c\ -hello\ -world' address@hidden address@hidden address@hidden address@hidden address@hidden example address@hidden off address@hidden off - -If no addresses are given, each line is replaced. - -A new cycle is started after this command is done, -since the pattern space will have been deleted. -In the following example, the @code{c} starts a -new cycle and the substitution command is not performed -on the replaced text: - address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2c\ -hello -s/./X/' address@hidden address@hidden address@hidden address@hidden example address@hidden off address@hidden off - -As a @acronym{GNU} extension, the @code{c} command and @var{text} can be -separated into two @code{-e} parameters, enabling easier scripting: address@hidden on address@hidden on address@hidden -$ seq 3 | sed -e '2c\' -e hello -1 -hello -3 - -$ sed -e '2c\' -e "$VAR" address@hidden example address@hidden off address@hidden off - - address@hidden = address@hidden = (print line number) command address@hidden Printing line number address@hidden Line number, printing -Print out the current input line number (with a trailing newline). - address@hidden on address@hidden on address@hidden -$ printf '%s\n' aaa bbb ccc | sed = -1 -aaa -2 -bbb -3 -ccc address@hidden example address@hidden off address@hidden off - address@hidden @value{SSEDEXT}, two addresses supported by most commands -As a @acronym{GNU} extension, this command accepts two addresses. - - - - address@hidden l @var{n} address@hidden l (list unambiguously) command address@hidden List pattern space address@hidden Printing text unambiguously address@hidden Line length, setting address@hidden @value{SSEDEXT}, setting line length -Print the pattern space in an unambiguous form: -non-printable characters (and the @code{\} character) -are printed in C-style escaped form; long lines are split, -with a trailing @code{\} character to indicate the split; -the end of each line is marked with a @code{$}. - address@hidden specifies the desired line-wrap length; -a length of 0 (zero) means to never wrap long lines. If omitted, -the default as specified on the command line is used. The @var{n} -parameter is a @value{SSED} extension. - address@hidden r @var{filename} - address@hidden r (read file) command address@hidden Read text from a file -Reads text file a file. Example: - address@hidden on address@hidden on address@hidden -$ seq 3 | sed '2r/etc/hostname' -1 -2 -fencepost.gnu.org -3 address@hidden example address@hidden off address@hidden off - address@hidden @value{SSEDEXT}, @file{/dev/stdin} file -Queue the contents of @var{filename} to be read and -inserted into the output stream at the end of the current cycle, -or when the next input line is read. -Note that if @var{filename} cannot be read, it is treated as -if it were an empty file, without any error indication. - -As a @value{SSED} extension, the special value @file{/dev/stdin} -is supported for the file name, which reads the contents of the -standard input. - address@hidden @value{SSEDEXT}, two addresses supported by most commands -As a @acronym{GNU} extension, this command accepts two addresses. The -file will then be reread and inserted on each of the addressed lines. - address@hidden w @var{filename} address@hidden w (write file) command address@hidden Write to a file address@hidden @value{SSEDEXT}, @file{/dev/stdout} file address@hidden @value{SSEDEXT}, @file{/dev/stderr} file -Write the pattern space to @var{filename}. -As a @value{SSED} extension, two special values of @var{file-name} are -supported: @file{/dev/stderr}, which writes the result to the standard -error, and @file{/dev/stdout}, which writes to the standard address@hidden is equivalent to @code{p} unless the @option{-i} -option is being used.} - -The file will be created (or truncated) before the first input line is -read; all @code{w} commands (including instances of the @code{w} flag -on successful @code{s} commands) which refer to the same @var{filename} -are output without closing and reopening the file. - address@hidden D address@hidden D (delete first line) command address@hidden Delete first line from pattern space -If pattern space contains no newline, start a normal new cycle as if -the @code{d} command was issued. Otherwise, delete text in the pattern -space up to the first newline, and restart cycle with the resultant -pattern space, without reading a new line of input. - address@hidden N address@hidden N (append Next line) command address@hidden Next input line, append to pattern space address@hidden Append next input line to pattern space -Add a newline to the pattern space, -then append the next line of input to the pattern space. -If there is no more input then @command{sed} exits without processing -any more commands. - -When @option{-z} is used, a zero byte (the ascii @samp{NUL} character) is -added between the lines (instead of a new line). - -By default @command{sed} does not terminate if there is no 'next' input line. -This is a GNU extension which can be disabled with @option{--posix}. address@hidden,,N command on the last line}. - - address@hidden P address@hidden P (print first line) command address@hidden Print first line from pattern space -Print out the portion of the pattern space up to the first newline. - address@hidden h address@hidden h (hold) command address@hidden Copy pattern space into hold space address@hidden Replace hold space with copy of pattern space address@hidden Hold space, copying pattern space into -Replace the contents of the hold space with the contents of the pattern space. - address@hidden H address@hidden H (append Hold) command address@hidden Append pattern space to hold space address@hidden Hold space, appending from pattern space -Append a newline to the contents of the hold space, -and then append the contents of the pattern space to that of the hold space. - address@hidden g address@hidden g (get) command address@hidden Copy hold space into pattern space address@hidden Replace pattern space with copy of hold space address@hidden Hold space, copy into pattern space -Replace the contents of the pattern space with the contents of the hold space. - address@hidden G address@hidden G (appending Get) command address@hidden Append hold space to pattern space address@hidden Hold space, appending to pattern space -Append a newline to the contents of the pattern space, -and then append the contents of the hold space to that of the pattern space. - address@hidden x address@hidden x (eXchange) command address@hidden Exchange hold space with pattern space address@hidden Hold space, exchange with pattern space -Exchange the contents of the hold and pattern spaces. - address@hidden table - - address@hidden Programming Commands address@hidden Commands for @command{sed} gurus - -In most cases, use of these commands indicates that you are -probably better off programming in something like @command{awk} -or Perl. But occasionally one is committed to sticking -with @command{sed}, and these commands can enable one to write -quite convoluted scripts. - address@hidden Flow of control in scripts address@hidden @code address@hidden : @var{label} -[No addresses allowed.] - address@hidden : (label) command address@hidden Labels, in scripts -Specify the location of @var{label} for branch commands. -In all other respects, a no-op. - address@hidden b @var{label} address@hidden b (branch) command address@hidden Branch to a label, unconditionally address@hidden Goto, in scripts -Unconditionally branch to @var{label}. -The @var{label} may be omitted, in which case the next cycle is started. - address@hidden t @var{label} address@hidden t (test and branch if successful) command address@hidden Branch to a label, if @code{s///} succeeded address@hidden Conditional branch -Branch to @var{label} only if there has been a successful @code{s}ubstitution -since the last input line was read or conditional branch was taken. -The @var{label} may be omitted, in which case the next cycle is started. - address@hidden table - address@hidden Extended Commands address@hidden Commands Specific to @value{SSED} - -These commands are specific to @value{SSED}, so you -must use them with care and only when you are sure that -hindering portability is not evil. They allow you to check -for @value{SSED} extensions or to do tasks that are required -quite often, yet are unsupported by standard @command{sed}s. - address@hidden @code address@hidden e address@hidden address@hidden e (evaluate) command address@hidden Evaluate Bourne-shell commands address@hidden Subprocesses address@hidden @value{SSEDEXT}, evaluating Bourne-shell commands address@hidden @value{SSEDEXT}, subprocesses -This command allows one to pipe input from a shell command -into pattern space. Without parameters, the @code{e} command -executes the command that is found in pattern space and -replaces the pattern space with the output; a trailing newline -is suppressed. - -If a parameter is specified, instead, the @code{e} command -interprets it as a command and sends its output to the output stream. -The command can run across multiple lines, all but the last ending with -a back-slash. - -In both cases, the results are undefined if the command to be -executed contains a @sc{nul} character. - -Note that, unlike the @code{r} command, the output of the command will -be printed immediately; the @code{r} command instead delays the output -to the end of the current cycle. - address@hidden F address@hidden F (File name) command address@hidden Printing file name address@hidden File name, printing -Print out the file name of the current input file (with a trailing -newline). - address@hidden Q address@hidden -This command only accepts a single address. - address@hidden Q (silent Quit) command address@hidden @value{SSEDEXT}, quitting silently address@hidden @value{SSEDEXT}, returning an exit code address@hidden Quitting -This command is the same as @code{q}, but will not print the -contents of pattern space. Like @code{q}, it provides the -ability to return an exit code to the caller. - -This command can be useful because the only alternative ways -to accomplish this apparently trivial function are to use -the @option{-n} option (which can unnecessarily complicate -your script) or resorting to the following snippet, which -wastes time by reading the whole file without any visible effect: - address@hidden -:eat -$d @address@hidden silently on the last line}} -N @address@hidden another line, silently}} -g @address@hidden pattern space each time to save memory}} -b eat address@hidden example - address@hidden R @var{filename} address@hidden R (read line) command address@hidden Read text from a file address@hidden @value{SSEDEXT}, reading a file a line at a time address@hidden @value{SSEDEXT}, @code{R} command address@hidden @value{SSEDEXT}, @file{/dev/stdin} file -Queue a line of @var{filename} to be read and -inserted into the output stream at the end of the current cycle, -or when the next input line is read. -Note that if @var{filename} cannot be read, or if its end is -reached, no line is appended, without any error indication. - -As with the @code{r} command, the special value @file{/dev/stdin} -is supported for the file name, which reads a line from the -standard input. - address@hidden T @var{label} address@hidden T (test and branch if failed) command address@hidden @value{SSEDEXT}, branch if @code{s///} failed address@hidden Branch to a label, if @code{s///} failed address@hidden Conditional branch -Branch to @var{label} only if there have been no successful address@hidden since the last input line was read or -conditional branch was taken. The @var{label} may be omitted, -in which case the next cycle is started. - address@hidden v @var{version} address@hidden v (version) command address@hidden @value{SSEDEXT}, checking for their presence address@hidden Requiring @value{SSED} -This command does nothing, but makes @command{sed} fail if address@hidden extensions are not supported, simply because other -versions of @command{sed} do not implement it. In addition, you -can specify the version of @command{sed} that your script -requires, such as @code{4.0.5}. The default is @code{4.0} -because that is the first version that implemented this command. - -This command enables all @value{SSEDEXT} even if address@hidden is set in the environment. - address@hidden W @var{filename} address@hidden W (write first line) command address@hidden Write first line to a file address@hidden @value{SSEDEXT}, writing first line to a file -Write to the given filename the portion of the pattern space up to -the first newline. Everything said under the @code{w} command about -file handling holds here too. - address@hidden z address@hidden z (Zap) command address@hidden @value{SSEDEXT}, emptying pattern space address@hidden Emptying pattern space -This command empties the content of pattern space. It is -usually the same as @samp{s/.*//}, but is more efficient -and works in the presence of invalid multibyte sequences -in the input stream. @sc{posix} mandates that such sequences -are @emph{not} matched by @samp{.}, so that there is no portable -way to clear @command{sed}'s buffers in the middle of the -script in most multibyte locales (including UTF-8 locales). address@hidden table - - - - - address@hidden sed addresses address@hidden Addresses: selecting lines - address@hidden -* Addresses overview:: Addresses overview -* Numeric Addresses:: selecting lines by numbers -* Regexp Addresses:: selecting lines by text matching -* Range Addresses:: selecting a range of lines address@hidden menu - address@hidden Addresses overview address@hidden Addresses overview - address@hidden addresses, numeric address@hidden numeric addresses -Addresses determine on which line(s) the @command{sed} command will be -executed. The following command replaces the word @samp{hello} -with @samp{world} only on line 144: - address@hidden on address@hidden on address@hidden -sed '144s/hello/world/' input.txt > output.txt address@hidden example address@hidden off address@hidden off - - - -If no addresses are given, the command is performed on all lines. -The following command replaces the word @samp{hello} with @samp{world} -on all lines in the input file: - address@hidden on address@hidden on address@hidden -sed 's/hello/world/' input.txt > output.txt address@hidden example address@hidden off address@hidden off - - - address@hidden addresses, regular expression address@hidden regular expression addresses -Addresses can contain regular expressions to match lines based -on content instead of line numbers. The following command replaces -the word @samp{hello} with @samp{world} only in lines -containing the word @samp{apple}: - address@hidden on address@hidden on address@hidden -sed '/apple/s/hello/world/' input.txt > output.txt address@hidden example address@hidden off address@hidden off - - - address@hidden addresses, range address@hidden range addresses -An address range is specified with two addresses separated by a comma -(@code{,}). Addresses can be numeric, regular expressions, or a mix of -both. -The following command replaces the word @samp{hello} with @samp{world} -only in lines 4 to 17 (inclusive): - address@hidden on address@hidden on address@hidden -sed '4,17s/hello/world/' input.txt > output.txt address@hidden example address@hidden off address@hidden off - - - address@hidden Excluding lines address@hidden Selecting non-matching lines address@hidden addresses, negating address@hidden addresses, excluding -Appending the @code{!} character to the end of an address -specification (before the command letter) negates the sense of the -match. That is, if the @code{!} character follows an address or an -address range, then only lines which do @emph{not} match the addresses -will be selected. The following command replaces the word @samp{hello} -with @samp{world} only in lines @emph{not} containing the word address@hidden: - address@hidden -sed '/apple/!s/hello/world/' input.txt > output.txt address@hidden example - -The following command replaces the word @samp{hello} with address@hidden only in lines 1 to 3 and 18 till the last line of the input file -(i.e. excluding lines 4 to 17): - address@hidden -sed '4,17!s/hello/world/' input.txt > output.txt address@hidden example - - - - - address@hidden Numeric Addresses address@hidden Selecting lines by numbers address@hidden Addresses, in @command{sed} scripts address@hidden Line selection address@hidden Selecting lines to process - -Addresses in a @command{sed} script can be in any of the following forms: address@hidden @code address@hidden @var{number} address@hidden Address, numeric address@hidden Line, selecting by number -Specifying a line number will match only that line in the input. -(Note that @command{sed} counts lines continuously across all input files -unless @option{-i} or @option{-s} options are specified.) - address@hidden $ address@hidden Address, last line address@hidden Last line, selecting address@hidden Line, selecting last -This address matches the last line of the last file of input, or -the last line of each file when the @option{-i} or @option{-s} options -are specified. - - address@hidden @address@hidden address@hidden @acronym{GNU} extensions, @address@hidden@var{m}} addresses -This @acronym{GNU} extension matches every @var{step}th line -starting with line @var{first}. -In particular, lines will be selected when there exists -a non-negative @var{n} such that the current line-number equals address@hidden + (@var{n} * @var{step}). -Thus, one would use @code{1~2} to select the odd-numbered lines and address@hidden for even-numbered lines; -to pick every third line starting with the second, @samp{2~3} would be used; -to pick every fifth line starting with the tenth, use @samp{10~5}; -and @samp{50~0} is just an obscure way of saying @code{50}. - -The following commands demonstrate the step address usage: - address@hidden -$ seq 10 | sed -n '0~4p' -4 -8 - -$ seq 10 | sed -n '1~3p' -1 -4 -7 -10 address@hidden example - - address@hidden table - - - address@hidden Regexp Addresses address@hidden selecting lines by text matching - address@hidden supports the following regular expression addresses. -The default regular expression is address@hidden syntax, , Basic Regular Expression (BRE)}. -If @option{-E} or @option{-r} options are used, The regular expression should be -in @ref{ERE syntax, , Extended Regular Expression (ERE)} syntax. address@hidden vs ERE}. - address@hidden @code address@hidden /@var{regexp}/ address@hidden Address, as a regular expression address@hidden Line, selecting by regular expression match -This will select any line which matches the regular expression @var{regexp}. -If @var{regexp} itself includes any @code{/} characters, -each must be escaped by a backslash (@code{\}). - -The following command prints lines in @file{/etc/passwd} -which end with @address@hidden -There are of course many other ways to do the same, -e.g. address@hidden -grep 'bash$' /etc/passwd -awk -F: '$7 == "/bin/bash"' /etc/passwd address@hidden example -}: - address@hidden -sed -n '/bash$/p' /etc/passwd address@hidden example - address@hidden empty regular expression address@hidden @value{SSEDEXT}, modifiers and the empty regular expression -The empty regular expression @samp{//} repeats the last regular -expression match (the same holds if the empty regular expression is -passed to the @code{s} command). Note that modifiers to regular expressions -are evaluated when the regular expression is compiled, thus it is invalid to -specify them together with the empty regular expression. - address@hidden address@hidden -(The @code{%} may be replaced by any other single character.) - address@hidden Slash character, in regular expressions -This also matches the regular expression @var{regexp}, -but allows one to use a different delimiter than @code{/}. -This is particularly useful if the @var{regexp} itself contains -a lot of slashes, since it avoids the tedious escaping of every @code{/}. -If @var{regexp} itself includes any delimiter characters, -each must be escaped by a backslash (@code{\}). - -The following two commands are equivalent. They print lines -which start with @samp{/home/alice/documents/}: - address@hidden -sed -n '/^\/home\/alice\/documents\//p' -sed -n '\%^/home/alice/documents/%p' -sed -n '\;^/home/alice/documents/;p' address@hidden example - - address@hidden /@var{regexp}/I address@hidden address@hidden address@hidden @acronym{GNU} extensions, @code{I} modifier address@hidden case insensitive, regular expression -The @code{I} modifier to regular-expression matching is a @acronym{GNU} -extension which causes the @var{regexp} to be matched in -a case-insensitive manner. - -In many other programming languages, a lower case @code{i} is used -for case-insensitive regular expression matching. However, in @command{sed} -the @code{i} is used for the insert command (TODO: add @code{pxref}). - -Observe the difference between the following examples. - -In this example, @code{/b/I} is the address: regular expression with @code{I} -modifier. @code{d} is the delete command: - address@hidden -$ printf "%s\n" a b c | sed '/b/Id' -a -c address@hidden example - -Here, @code{/b/} is the address: a regular expression. address@hidden is the insert command. address@hidden is the value to insert. -A line with @samp{d} is then inserted above the matched line: - address@hidden -$ printf "%s\n" a b c | sed '/b/id' -a -d -b -c address@hidden example - address@hidden /@var{regexp}/M address@hidden address@hidden address@hidden @value{SSEDEXT}, @code{M} modifier -The @code{M} modifier to regular-expression matching is a @value{SSED} -extension which directs @value{SSED} to match the regular expression -in @cite{multi-line} mode. The modifier causes @code{^} and @code{$} to -match respectively (in addition to the normal behavior) the empty string -after a newline, and the empty string before a newline. There are -special character sequences address@hidden PERL -(@code{\`} and @code{\'}) address@hidden ifclear -which always match the beginning or the end of the buffer. -In addition, -the period character does not match a new-line character in -multi-line mode. address@hidden table - address@hidden Range Addresses address@hidden Range Addresses - address@hidden Range of lines address@hidden Several lines, selecting -An address range can be specified by specifying two addresses -separated by a comma (@code{,}). An address range matches lines -starting from where the first address matches, and continues -until the second address matches (inclusively): - address@hidden -$ seq 10 | sed -n '4,6p' -4 -5 -6 address@hidden example - -If the second address is a @var{regexp}, then checking for the -ending match will start with the line @emph{following} the -line which matched the first address: a range will always -span at least two lines (except of course if the input stream -ends). - address@hidden -$ seq 10 | sed -n '4,/[0-9]/p' -4 -5 address@hidden example - -If the second address is a @var{number} less than (or equal to) -the line matching the first address, then only the one line is -matched: - address@hidden -$ seq 10 | sed -n '4,1p' -4 address@hidden example - address@hidden Special addressing forms address@hidden Range with start address of zero address@hidden Zero, as range start address address@hidden @var{addr1},+N address@hidden @var{addr1},~N address@hidden @acronym{GNU} extensions, special two-address forms address@hidden @acronym{GNU} extensions, @code{0} address address@hidden @acronym{GNU} extensions, 0,@var{addr2} addressing address@hidden @acronym{GNU} extensions, @var{addr1},address@hidden addressing address@hidden @acronym{GNU} extensions, @var{addr1},address@hidden addressing address@hidden also supports some special two-address forms; all these -are @acronym{GNU} extensions: address@hidden @code address@hidden 0,/@var{regexp}/ -A line number of @code{0} can be used in an address specification like address@hidden,/@var{regexp}/} so that @command{sed} will try to match address@hidden in the first input line too. In other words, address@hidden,/@var{regexp}/} is similar to @code{1,/@var{regexp}/}, -except that if @var{addr2} matches the very first line of input the address@hidden,/@var{regexp}/} form will consider it to end the range, whereas -the @code{1,/@var{regexp}/} form will match the beginning of its range and -hence make the range span up to the @emph{second} occurrence of the -regular expression. - -Note that this is the only place where the @code{0} address makes -sense; there is no 0-th line and commands which are given the @code{0} -address in any other way will give an error. - -The following examples demonstrate the difference between starting -with address 1 and 0: - address@hidden -$ seq 10 | sed -n '1,/[0-9]/p' -1 -2 - -$ seq 10 | sed -n '0,/[0-9]/p' -1 address@hidden example - - address@hidden @var{addr1},address@hidden -Matches @var{addr1} and the @var{N} lines following @var{addr1}. - address@hidden -$ seq 10 | sed -n '6,+2p' -6 -7 -8 address@hidden example - address@hidden can be a line number or a regular expression. - address@hidden @var{addr1},address@hidden -Matches @var{addr1} and the lines following @var{addr1} -until the next line whose input line number is a multiple of @var{N}. -The following command prints starting at line 6, until the next line which -is a multiple of 4 (i.e. line 8): - address@hidden -$ seq 10 | sed -n '6,~4p' -6 -7 -8 address@hidden example - address@hidden can be a line number or a regular expression. - address@hidden table - - - - address@hidden sed regular expressions address@hidden Regular Expressions: selecting text - address@hidden -* Regular Expressions Overview:: Overview of Regular expression in @command{sed} -* BRE vs ERE:: Basic (BRE) and extended (ERE) regular expression - syntax -* BRE syntax:: Overview of basic regular expression syntax -* ERE syntax:: Overview of extended regular expression syntax -* Character Classes and Bracket Expressions:: -* regexp extensions:: Additional regular expression commands -* Back-references and Subexpressions:: Back-references and Subexpressions -* Escapes:: Specifying special characters -* Locale Considerations:: address@hidden menu - address@hidden Regular Expressions Overview address@hidden Overview of regular expression in @command{sed} - address@hidden NOTE: Keep examples in the 'overview' section address@hidden neutral in regards to BRE/ERE - to ease understanding. - - -To know how to use @command{sed}, people should understand regular -expressions (@dfn{regexp} for short). A regular expression -is a pattern that is matched against a -subject string from left to right. Most characters are address@hidden: they stand for -themselves in a pattern, and match the corresponding characters. -Regular expressions in @command{sed} are specified between two -slashes. - -The following command prints lines containing the word address@hidden: - address@hidden -sed -n '/hello/p' address@hidden example - -The above example is equivalent to this @command{grep} command: - address@hidden -grep 'hello' address@hidden example - -The power of regular expressions comes from the ability to include -alternatives and repetitions in the pattern. These are encoded in the -pattern by the use of @dfn{special characters}, which do not stand for -themselves but instead are interpreted in some special way. - -The character @code{^} (caret) in a regular expression matches the -beginning of the line. The character @code{.} (dot) matches any single -character. The following @command{sed} command matches and prints -lines which start with the letter @samp{b}, followed by any single character, -followed by the letter @samp{d}: - address@hidden -$ printf "%s\n" abode bad bed bit bid byte body | sed -n '/^b.d/p' -bad -bed -bid -body address@hidden example - -The following sections explain the meaning and usage of special -characters in regular expressions. - address@hidden BRE vs ERE address@hidden Basic (BRE) and extended (ERE) regular expression - -Basic and extended regular expressions are two variations on the -syntax of the specified pattern. Basic Regular Expression (BRE) is the -default in @command{sed} (and similarly in @command{grep}). Extended -Regular Expression syntax (ERE) is activated by using the @option{-r} -or @option{-E} options (and similarly, @command{grep -E}). - -In @value{SSED} the only difference between basic and extended regular -expressions is in the behavior of a few special characters: @samp{?}, address@hidden, parentheses, braces (@address@hidden@}}), and @samp{|}. - -With basic (BRE) syntax, these characters do not have special meaning -unless prefixed backslash (@samp{\}); While with extended (ERE) syntax -it is reversed: these characters are special unless they are prefixed -with backslash (@samp{\}). - address@hidden @columnfractions .33 .33 .33 - address@hidden Desired pattern address@hidden Basic (BRE) Syntax address@hidden Extended (ERE) Syntax - address@hidden literal @samp{+} (plus sign) - address@hidden address@hidden -$ echo "a+b=c" | sed -n '/a+b/p' -a+b=c address@hidden example - address@hidden address@hidden -$ echo "a+b=c" | sed -E -n '/a\+b/p' -a+b=c address@hidden example - - address@hidden One or more @samp{a} characters followed by @samp{b} -(plus sign as special meta-character) - address@hidden address@hidden -$ echo "aab" | sed -n '/a\+b/p' -aab address@hidden example - address@hidden address@hidden -$ echo "aab" | sed -E -n '/a+b/p' -aab address@hidden example - address@hidden multitable - - - - address@hidden BRE syntax address@hidden Overview of basic regular expression syntax - -Here is a brief description -of regular expression syntax as used in @command{sed}. - address@hidden @code address@hidden @var{char} -A single ordinary character matches itself. - address@hidden * address@hidden @acronym{GNU} extensions, to basic regular expressions -Matches a sequence of zero or more instances of matches for the -preceding regular expression, which must be an ordinary character, a -special character preceded by @code{\}, a @code{.}, a grouped regexp -(see below), or a bracket expression. As a @acronym{GNU} extension, a -postfixed regular expression can also be followed by @code{*}; for -example, @code{a**} is equivalent to @code{a*}. @acronym{POSIX} -1003.1-2001 says that @code{*} stands for itself when it appears at -the start of a regular expression or subexpression, but many address@hidden implementations do not support this and portable -scripts should instead use @code{\*} in these contexts. address@hidden . -Matches any character, including newline. - address@hidden ^ -Matches the null string at beginning of the pattern space, i.e. what -appears after the circumflex must appear at the beginning of the -pattern space. - -In most scripts, pattern space is initialized to the content of each -line (@pxref{Execution Cycle, , How @code{sed} works}). So, it is a -useful simplification to think of @code{^#include} as matching only -lines where @samp{#include} is the first thing on line---if there are -spaces before, for example, the match fails. This simplification is -valid as long as the original content of pattern space is not modified, -for example with an @code{s} command. - address@hidden acts as a special character only at the beginning of the -regular expression or subexpression (that is, after @code{\(} or address@hidden|}). Portable scripts should avoid @code{^} at the beginning of -a subexpression, though, as @acronym{POSIX} allows implementations that -treat @code{^} as an ordinary character in that context. - address@hidden $ -It is the same as @code{^}, but refers to end of pattern space. address@hidden also acts as a special character only at the end -of the regular expression or subexpression (that is, before @code{\)} -or @code{\|}), and its use at the end of a subexpression is not -portable. - - address@hidden address@hidden address@hidden address@hidden -Matches any single character in @var{list}: for example, address@hidden matches all vowels. A list may include -sequences like @address@hidden@var{char2}}, which -matches any character between (inclusive) @var{char1} -and @var{char2}. address@hidden Classes and Bracket Expressions}. - address@hidden \+ address@hidden @acronym{GNU} extensions, to basic regular expressions -As @code{*}, but matches one or more. It is a @acronym{GNU} extension. - address@hidden \? address@hidden @acronym{GNU} extensions, to basic regular expressions -As @code{*}, but only matches zero or one. It is a @acronym{GNU} extension. - address@hidden address@hidden@address@hidden -As @code{*}, but matches exactly @var{i} sequences (@var{i} is a -decimal integer; for portability, keep it between 0 and 255 -inclusive). - address@hidden address@hidden@var{i},@address@hidden -Matches between @var{i} and @var{j}, inclusive, sequences. - address@hidden address@hidden@var{i},address@hidden -Matches more than or equal to @var{i} sequences. - address@hidden \(@var{regexp}\) -Groups the inner @var{regexp} as a whole, this is used to: - address@hidden @bullet address@hidden address@hidden @acronym{GNU} extensions, to basic regular expressions -Apply postfix operators, like @code{\(abcd\)*}: -this will search for zero or more whole sequences -of @samp{abcd}, while @code{abcd*} would search -for @samp{abc} followed by zero or more occurrences -of @samp{d}. Note that support for @code{\(abcd\)*} is -required by @acronym{POSIX} 1003.1-2001, but many address@hidden -implementations do not support it and hence it is not universally -portable. - address@hidden -Use back references (see below). address@hidden itemize - - address@hidden @var{regexp1}\|@var{regexp2} address@hidden @acronym{GNU} extensions, to basic regular expressions -Matches either @var{regexp1} or @var{regexp2}. Use -parentheses to use complex alternative regular expressions. -The matching process tries each alternative in turn, from -left to right, and the first one that succeeds is used. -It is a @acronym{GNU} extension. - address@hidden @address@hidden -Matches the concatenation of @var{regexp1} and @var{regexp2}. -Concatenation binds more tightly than @code{\|}, @code{^}, and address@hidden, but less tightly than the other regular expression -operators. - address@hidden address@hidden -Matches the @var{digit}-th @code{\(@dots{}\)} parenthesized -subexpression in the regular expression. This is called a @dfn{back -reference}. Subexpressions are implicitly numbered by counting -occurrences of @code{\(} left-to-right. - address@hidden \n -Matches the newline character. - address@hidden address@hidden -Matches @var{char}, where @var{char} is one of @code{$}, address@hidden, @code{.}, @code{[}, @code{\}, or @code{^}. -Note that the only C-like -backslash sequences that you can portably assume to be -interpreted are @code{\n} and @code{\\}; in particular address@hidden is not portable, and matches a @samp{t} under most -implementations of @command{sed}, rather than a tab character. - address@hidden table - address@hidden Greedy regular expression matching -Note that the regular expression matcher is greedy, i.e., matches -are attempted from left to right and, if two or more matches are -possible starting at the same character, it selects the longest. - address@hidden -Examples: address@hidden @samp address@hidden abcdef -Matches @samp{abcdef}. - address@hidden a*b -Matches zero or more @samp{a}s followed by a single address@hidden For example, @samp{b} or @samp{aaaaab}. - address@hidden a\?b -Matches @samp{b} or @samp{ab}. - address@hidden a\+b\+ -Matches one or more @samp{a}s followed by one or more address@hidden: @samp{ab} is the shortest possible match, but -other examples are @samp{aaaab} or @samp{abbbbb} or address@hidden - address@hidden .* address@hidden .\+ -These two both match all the characters in a string; -however, the first matches every string (including the empty -string), while the second matches only strings containing -at least one character. - address@hidden ^main.*(.*) -This matches a string starting with @samp{main}, -followed by an opening and closing -parenthesis. The @samp{n}, @samp{(} and @samp{)} need not -be adjacent. - address@hidden ^# -This matches a string beginning with @samp{#}. - address@hidden \\$ -This matches a string ending with a single backslash. The -regexp contains two backslashes for escaping. - address@hidden \$ -Instead, this matches a string consisting of a single dollar sign, -because it is escaped. - address@hidden [a-zA-Z0-9] -In the C locale, this matches any @acronym{ASCII} letters or digits. - address@hidden [^ @kbd{tab}]\+ -(Here @kbd{tab} stands for a single tab character.) -This matches a string of one or more -characters, none of which is a space or a tab. -Usually this means a word. - address@hidden ^\(.*\)\n\1$ -This matches a string consisting of two equal substrings separated by -a newline. - address@hidden address@hidden@}A$ -This matches nine characters followed by an @samp{A} at the end of a line. - address@hidden address@hidden@}A -This matches the start of a string that contains 16 characters, -the last of which is an @samp{A}. - address@hidden table - - address@hidden ERE syntax address@hidden Overview of extended regular expression syntax address@hidden Extended regular expressions, syntax - -The only difference between basic and extended regular expressions is in -the behavior of a few characters: @samp{?}, @samp{+}, parentheses, -braces (@address@hidden@}}), and @samp{|}. While basic regular expressions -require these to be escaped if you want them to behave as special -characters, when using extended regular expressions you must escape -them if you want them @emph{to match a literal character}. @samp{|} -is special here because @samp{\|} is a GNU extension -- standard -basic regular expressions do not provide its functionality. - address@hidden -Examples: address@hidden @code address@hidden abc? -becomes @samp{abc\?} when using extended regular expressions. It matches -the literal string @samp{abc?}. - address@hidden c\+ -becomes @samp{c+} when using extended regular expressions. It matches -one or more @samp{c}s. - address@hidden address@hidden,address@hidden -becomes @address@hidden,@}} when using extended regular expressions. It matches -three or more @samp{a}s. - address@hidden \(abc\)address@hidden,address@hidden -becomes @samp{(abc)@{2,address@hidden when using extended regular expressions. It -matches either @samp{abcabc} or @samp{abcabcabc}. - address@hidden \(abc*\)\1 -becomes @samp{(abc*)\1} when using extended regular expressions. -Backreferences must still be escaped when using extended regular -expressions. - address@hidden a\|b -becomes @samp{a|b} when using extended regular expressions. It matches address@hidden or @samp{b}. address@hidden table - address@hidden Character Classes and Bracket Expressions address@hidden Character Classes and Bracket Expressions - address@hidden The 'character class' section is shamelessly copied from grep's manual. - address@hidden bracket expression address@hidden character class -A @dfn{bracket expression} is a list of characters enclosed by @samp{[} and address@hidden -It matches any single character in that list; -if the first character of the list is the caret @samp{^}, -then it matches any character @strong{not} in the list. -For example, the following command replaces the words address@hidden or @samp{grey} with @samp{blue}: - address@hidden -sed 's/gr[ae]y/blue/' address@hidden example - address@hidden TODO: fix 'ref' to look good in both HTML and PDF -Bracket expressions can be used in both address@hidden syntax,,basic} and @ref{ERE syntax,,extended} -regular expressions (that is, with or without the @option{-E}/@option{-r} -options). - address@hidden range expression -Within a bracket expression, a @dfn{range expression} consists of two -characters separated by a hyphen. -It matches any single character that -sorts between the two characters, inclusive. -In the default C locale, the sorting sequence is the native character -order; for example, @samp{[a-d]} is equivalent to @samp{[abcd]}. - - -Finally, certain named classes of characters are predefined within -bracket expressions, as follows. - -These named classes must be used @emph{inside} brackets -themselves. Correct usage: address@hidden -$ echo 1 | sed 's/[[:digit:]]/X/' -X address@hidden example - -Incorrect usage is rejected by newer @command{sed} versions. -Older versions accepted it but treated it as a single bracket expression -(which is equivalent to @samp{[dgit:]}, -that is, only the characters @var{d/g/i/t/:}): address@hidden -# current GNU sed versions - incorrect usage rejected -$ echo 1 | sed 's/[:digit:]/X/' -sed: character class syntax is [[:space:]], not [:space:] - -# older GNU sed versions -$ echo 1 | sed 's/[:digit:]/X/' -1 address@hidden example - - address@hidden classes of characters address@hidden character classes address@hidden named character classes address@hidden @samp - address@hidden [:alnum:] address@hidden alnum @r{character class} address@hidden alphanumeric characters -Alphanumeric characters: address@hidden:alpha:]} and @samp{[:digit:]}; in the @samp{C} locale and ASCII -character encoding, this is the same as @samp{[0-9A-Za-z]}. - address@hidden [:alpha:] address@hidden alpha @r{character class} address@hidden alphabetic characters -Alphabetic characters: address@hidden:lower:]} and @samp{[:upper:]}; in the @samp{C} locale and ASCII -character encoding, this is the same as @samp{[A-Za-z]}. - address@hidden [:blank:] address@hidden blank @r{character class} address@hidden blank characters -Blank characters: -space and tab. - address@hidden [:cntrl:] address@hidden cntrl @r{character class} address@hidden control characters -Control characters. -In ASCII, these characters have octal codes 000 -through 037, and 177 (DEL). -In other character sets, these are -the equivalent characters, if any. - address@hidden [:digit:] address@hidden digit @r{character class} address@hidden digit characters address@hidden numeric characters -Digits: @code{0 1 2 3 4 5 6 7 8 9}. - address@hidden [:graph:] address@hidden graph @r{character class} address@hidden graphic characters -Graphical characters: address@hidden:alnum:]} and @samp{[:punct:]}. - address@hidden [:lower:] address@hidden lower @r{character class} address@hidden lower-case letters -Lower-case letters; in the @samp{C} locale and ASCII character -encoding, this is address@hidden b c d e f g h i j k l m n o p q r s t u v w x y z}. - address@hidden [:print:] address@hidden print @r{character class} address@hidden printable characters -Printable characters: address@hidden:alnum:]}, @samp{[:punct:]}, and space. - address@hidden [:punct:] address@hidden punct @r{character class} address@hidden punctuation characters -Punctuation characters; in the @samp{C} locale and ASCII character -encoding, this is address@hidden@: " # $ % & ' ( ) * + , - .@: / : ; < = > ?@: @@ [ \ ] ^ _ ` @{ | @} ~}. - address@hidden [:space:] address@hidden space @r{character class} address@hidden space characters address@hidden whitespace characters -Space characters: in the @samp{C} locale, this is -tab, newline, vertical tab, form feed, carriage return, and space. - - address@hidden [:upper:] address@hidden upper @r{character class} address@hidden upper-case letters -Upper-case letters: in the @samp{C} locale and ASCII character -encoding, this is address@hidden B C D E F G H I J K L M N O P Q R S T U V W X Y Z}. - address@hidden [:xdigit:] address@hidden xdigit @r{character class} address@hidden xdigit class address@hidden hexadecimal digits -Hexadecimal digits: address@hidden 1 2 3 4 5 6 7 8 9 A B C D E F a b c d e f}. - address@hidden table -Note that the brackets in these class names are -part of the symbolic names, and must be included in addition to -the brackets delimiting the bracket expression. - -Most meta-characters lose their special meaning inside bracket expressions: - address@hidden @samp address@hidden ] -ends the bracket expression if it's not the first list item. -So, if you want to make the @samp{]} character a list item, -you must put it first. - address@hidden - -represents the range if it's not first or last in a list or the ending point -of a range. - address@hidden ^ -represents the characters not in the list. -If you want to make the @samp{^} -character a list item, place it anywhere but first. address@hidden table - -TODO: incorporate this paragraph (copied verbatim from BRE section). - address@hidden @code{POSIXLY_CORRECT} behavior, bracket expressions -The characters @code{$}, @code{*}, @code{.}, @code{[}, and @code{\} -are normally not special within @var{list}. For example, @code{[\*]} -matches either @samp{\} or @samp{*}, because the @code{\} is not -special here. However, strings like @code{[.ch.]}, @code{[=a=]}, and address@hidden:space:]} are special within @var{list} and represent collating -symbols, equivalence classes, and character classes, respectively, and address@hidden is therefore special within @var{list} when it is followed by address@hidden, @code{=}, or @code{:}. Also, when not in address@hidden mode, special escapes like @code{\n} and address@hidden are recognized within @var{list}. @xref{Escapes}. address@hidden ******** - - address@hidden TODO: improve explanation about collation classes and equivalence classes address@hidden perhaps dedicate a section to Locales ?? - address@hidden @samp address@hidden [. -represents the open collating symbol. - address@hidden .] -represents the close collating symbol. - address@hidden [= -represents the open equivalence class. - address@hidden =] -represents the close equivalence class. - address@hidden [: -represents the open character class symbol, and should be followed by a -valid character class name. - address@hidden :] -represents the close character class symbol. address@hidden table - - address@hidden regexp extensions address@hidden regular expression extensions - -The following sequences have special meaning inside regular expressions -(used in @ref{Regexp Addresses,,addresses} and the @code{s} command). - -These can be used in both address@hidden syntax,,basic} and @ref{ERE syntax,,extended} -regular expressions (that is, with or without the @option{-E}/@option{-r} -options). - address@hidden @code address@hidden \w -Matches any ``word'' character. A ``word'' character is any -letter or digit or the underscore character. - address@hidden -$ echo "abc %-= def." | sed 's/\w/X/g' -XXX %-= XXX. address@hidden example - - address@hidden \W -Matches any ``non-word'' character. - address@hidden -$ echo "abc %-= def." | sed 's/\W/X/g' -abcXXXXXdefX address@hidden example - - address@hidden \b -Matches a word boundary; that is it matches if the character -to the left is a ``word'' character and the character to the -right is a ``non-word'' character, or vice-versa. - address@hidden -$ echo "abc %-= def." | sed 's/\b/X/g' -XabcX %-= XdefX. address@hidden example - - address@hidden \B -Matches everywhere but on a word boundary; that is it matches -if the character to the left and the character to the right -are either both ``word'' characters or both ``non-word'' -characters. - address@hidden -$ echo "abc %-= def." | sed 's/\w/X/g' -aXbXc X%X-X=X dXeXf.X address@hidden example - - address@hidden \s -Matches whitespace characters (spaces and tabs). -Newlines embedded in the pattern/hold spaces will also match: - address@hidden -$ echo "abc %-= def." | sed 's/\s/X/g' -abcX%-=Xdef. address@hidden example - - address@hidden \S -Matches non-whitespace characters. - address@hidden -$ echo "abc %-= def." | sed 's/\w/X/g' -XXX XXX XXXX address@hidden example - - address@hidden \< -Matches the beginning of a word. - address@hidden -$ echo "abc %-= def." | sed 's/\ -Matches the end of a word. - address@hidden -$ echo "abc %-= def." | sed 's/\>/X/g' -abcX %-= defX. address@hidden example - - address@hidden \` -Matches only at the start of pattern space. This is different -from @code{^} in multi-line mode. - -Compare the following two examples: - address@hidden -$ printf "a\nb\nc\n" | sed 'N;N;s/^/X/gm' -Xa -Xb -Xc - -$ printf "a\nb\nc\n" | sed 'N;N;s/\`/X/gm' -Xa -b -c address@hidden example - address@hidden \' -Matches only at the end of pattern space. This is different -from @code{$} in multi-line mode. - - - address@hidden table - - address@hidden Back-references and Subexpressions address@hidden Back-references and Subexpressions address@hidden subexpression address@hidden back-reference - address@hidden are regular expression commands which refer to a -previous part of the matched regular expression. Back-references are -specified with backslash and a single digit (e.g. @samp{\1}). The -part of the regular expression they refer to is called a address@hidden, and is designated with parentheses. - -Back-references and subexpressions are used in two cases: in the -regular expression search pattern, and in the @var{replacement} part -of the @command{s} command (@pxref{Regexp Addresses,,Regular -Expression Addresses} and @ref{The "s" Command}). - -In a regular expression pattern, back-references are used to match -the same content as a previously matched subexpression. In the -following example, the subexpression is @samp{.} - any single -character (being surrounded by parentheses makes it a -subexpression). The back-reference @samp{\1} asks to match the same -content (same character) as the sub-expression. - -The command below matches words starting with any character, -followed by the letter @samp{o}, followed by the same character as the -first. - address@hidden -$ sed -E -n '/^(.)o\1$/p' /usr/share/dict/words -bob -mom -non -pop -sos -tot -wow address@hidden example - -Multiple subexpressions are automatically numbered from -left-to-right. This command searches for 6-letter -palindromes (the first three letters are 3 subexpressions, -followed by 3 back-references in reverse order): - address@hidden -$ sed -E -n '/^(.)(.)(.)\3\2\1$/p' /usr/share/dict/words -redder address@hidden example - -In the @command{s} command, back-references can be -used in the @var{replacement} part to refer back to subexpressions in -the @var{regexp} part. - -The following example uses two subexpressions in the regular -expression to match two space-separated words. The back-references in -the @var{replacement} part prints the words in a different order: - address@hidden -$ echo "James Bond" | sed -E 's/(.*) (.*)/The name is \2, \1 \2./' -The name is Bond, James Bond. address@hidden example - - -When used with alternation, if the group does not participate in the -match then the back-reference makes the whole match fail. For -example, @samp{a(.)|b\1} will not match @samp{ba}. When multiple -regular expressions are given with @option{-e} or from a file -(@samp{-f @var{file}}), back-references are local to each expression. - - address@hidden Escapes address@hidden Escape Sequences - specifying special characters - address@hidden @acronym{GNU} extensions, special escapes -Until this chapter, we have only encountered escapes of the form address@hidden, which tell @command{sed} not to interpret the circumflex -as a special character, but rather to take it literally. For -example, @samp{\*} matches a single asterisk rather than zero -or more backslashes. - address@hidden @code{POSIXLY_CORRECT} behavior, escapes -This chapter introduces another kind of address@hidden -the escapes introduced here are @acronym{GNU} -extensions, with the exception of @code{\n}. In basic regular -expression mode, setting @code{POSIXLY_CORRECT} disables them inside -bracket expressions.}---that -is, escapes that are applied to a character or sequence of characters -that ordinarily are taken literally, and that @command{sed} replaces -with a special character. This provides a way -of encoding non-printable characters in patterns in a visible manner. -There is no restriction on the appearance of non-printing characters -in a @command{sed} script but when a script is being prepared in the -shell or by text editing, it is usually easier to use one of -the following escape sequences than the binary character it -represents: - -The list of these escapes is: - address@hidden @code address@hidden \a -Produces or matches a @sc{bel} character, that is an ``alert'' (@sc{ascii} 7). - address@hidden \f -Produces or matches a form feed (@sc{ascii} 12). - address@hidden \n -Produces or matches a newline (@sc{ascii} 10). - address@hidden \r -Produces or matches a carriage return (@sc{ascii} 13). - address@hidden \t -Produces or matches a horizontal tab (@sc{ascii} 9). - address@hidden \v -Produces or matches a so called ``vertical tab'' (@sc{ascii} 11). - address@hidden address@hidden -Produces or matches @address@hidden@var{x}}, where @var{x} is -any character. The precise effect of @address@hidden is as follows: -if @var{x} is a lower case letter, it is converted to upper case. -Then bit 6 of the character (hex 40) is inverted. Thus @samp{\cz} becomes -hex 1A, but @address@hidden becomes hex 3B, while @samp{\c;} becomes hex 7B. - address@hidden address@hidden -Produces or matches a character whose decimal @sc{ascii} value is @var{xxx}. - address@hidden address@hidden -Produces or matches a character whose octal @sc{ascii} value is @var{xxx}. - address@hidden address@hidden -Produces or matches a character whose hexadecimal @sc{ascii} value is @var{xx}. address@hidden table - address@hidden (backspace) was omitted because of the conflict with -the existing ``word boundary'' meaning. - - address@hidden Locale Considerations address@hidden Locale Considerations - -TODO: fix following paragraphs (copied verbatim from 'bracket -expression' section). - -TODO: mention locale support is heavily dependent on the OS/libc, not on sed. - -The current locale affects the characters matched by @command{sed}'s -regular expressions. - - -In other locales, the sorting sequence is not specified, and address@hidden might be equivalent to @samp{[abcd]} or to address@hidden, or it might fail to match any character, or the set of -characters that it matches might even be erratic. -To obtain the traditional interpretation -of bracket expressions, you can use the @samp{C} locale by setting the address@hidden environment variable to the value @samp{C}. - address@hidden -# TODO: is there any real-world system/locale where 'A' -# is replaced by '-' ? -$ echo A | sed 's/[a-z]/-/' -A address@hidden example - -Their interpretation depends on the @env{LC_CTYPE} locale; -for example, @samp{[[:alnum:]]} means the character class of numbers and letters -in the current locale. - -TODO: show example of collation - address@hidden -# TODO: this works on glibc systems, not on musl-libc/freebsd/macosx. -$ printf 'cliché\n' | LC_ALL=fr_FR.utf8 sed 's/[[=e=]]/X/g' -clichX address@hidden example - - address@hidden advanced sed address@hidden Advanced @command{sed}: cycles and buffers - address@hidden -* Execution Cycle:: How @command{sed} works -* Hold and Pattern Buffers:: -* Branching and flow control:: address@hidden menu - address@hidden Execution Cycle address@hidden How @command{sed} Works - address@hidden Buffer spaces, pattern and hold address@hidden Spaces, pattern and hold address@hidden Pattern space, definition address@hidden Hold space, definition address@hidden maintains two data buffers: the active @emph{pattern} space, -and the auxiliary @emph{hold} space. Both are initially empty. - address@hidden operates by performing the following cycle on each -line of input: first, @command{sed} reads one line from the input -stream, removes any trailing newline, and places it in the pattern space. -Then commands are executed; each command can have an address associated -to it: addresses are a kind of condition code, and a command is only -executed if the condition is verified before the command is to be -executed. - -When the end of the script is reached, unless the @option{-n} option -is in use, the contents of pattern space are printed out to the output -stream, adding back the trailing newline if it was address@hidden, -if @command{sed} prints a line without the terminating newline, it will -nevertheless print the missing newline as soon as more text is sent to -the same output stream, which gives the ``least expected surprise'' -even though it does not make commands like @samp{sed -n p} exactly -identical to @command{cat}.} Then the next cycle starts for the next -input line. - -Unless special commands (like @samp{D}) are used, the pattern space is -deleted between two cycles. The hold space, on the other hand, keeps -its data between cycles (see commands @samp{h}, @samp{H}, @samp{x}, address@hidden, @samp{G} to move data between both buffers). - address@hidden Hold and Pattern Buffers address@hidden Hold and Pattern Buffers - -TODO - address@hidden Branching and flow control address@hidden Branching and Flow Control - -TODO - address@hidden Examples address@hidden Some Sample Scripts - -Here are some @command{sed} scripts to guide you in the art of mastering address@hidden - address@hidden - -Useful one-liners: -* Joining lines:: - -Some exotic examples: -* Centering lines:: -* Increment a number:: -* Rename files to lower case:: -* Print bash environment:: -* Reverse chars of lines:: - -Emulating standard utilities: -* tac:: Reverse lines of files -* cat -n:: Numbering lines -* cat -b:: Numbering non-blank lines -* wc -c:: Counting chars -* wc -w:: Counting words -* wc -l:: Counting lines -* head:: Printing the first lines -* tail:: Printing the last lines -* uniq:: Make duplicate lines unique -* uniq -d:: Print duplicated lines of input -* uniq -u:: Remove all duplicated lines -* cat -s:: Squeezing blank lines address@hidden menu - address@hidden Joining lines address@hidden Joining lines - -Join specific lines (e.g. if lines 2 and 3 need to be joined): - address@hidden on address@hidden on address@hidden -$ cat lines.txt -hello -hel -lo -hello - -$ sed 'address@hidden;s/\n//;@}' lines.txt -hello -hello -hello address@hidden example address@hidden off address@hidden off - -Join lines ending with backslashes: - address@hidden on address@hidden on address@hidden -$ cat 1.txt -this \ -is \ -a \ -long \ -line -and another \ -line - -$ sed -e ':x /\\$/ @{ N; s/\\\n//g ; bx @}' 1.txt -this is a long line -and another line - - -#TODO: The above requires gnu sed. -# non-gnu seds need newlines after ':' and 'b' address@hidden example address@hidden off address@hidden off - address@hidden Centering lines address@hidden Centering Lines - -This script centers all lines of a file on a 80 columns width. -To change that width, the number in @address@hidden@address@hidden must be -replaced, and the number of added spaces also must be changed. - -Note how the buffer commands are used to separate parts in -the regular expressions to be matched---this is a common -technique. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -# Put 80 spaces in the buffer -1 @{ - x - s/^$/ / - s/^.*$/&&&&&&&&/ - x address@hidden - -# del leading and trailing spaces -y/@kbd{tab}/ / -s/^ *// -s/ *$// - -# add a newline and 80 spaces to end of line -G - -# keep first 81 chars (80 + a newline) -s/^\(address@hidden@}\).*$/\1/ - -# \2 matches half of the spaces, which are moved to the beginning -s/^\(.*\)\n\(.*\)\2/\2\1/ address@hidden example address@hidden end--------------------------------------------- - address@hidden Increment a number address@hidden Increment a Number - -This script is one of a few that demonstrate how to do arithmetic -in @command{sed}. This is indeed possible,@address@hidden guru Greg -Ubben wrote an implementation of the @command{dc} @sc{rpn} calculator! -It is distributed together with sed.} but must be done manually. - -To increment one number you just add 1 to last digit, replacing -it by the following digit. There is one exception: when the digit -is a nine the previous digits must be also incremented until you -don't have a nine. - -This solution by Bruno Haible is very clever and smart because -it uses a single buffer; if you don't have this limitation, the -algorithm used in @ref{cat -n, Numbering lines}, is faster. -It works by replacing trailing nines with an underscore, then -using multiple @code{s} commands to increment the last digit, -and then again substituting underscores with zeros. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -/[^0-9]/ d - -# replace all trailing 9s by _ (any other character except digits, could -# be used) -:d -s/9\(_*\)$/_\1/ -td - -# incr last digit only. The first line adds a most-significant -# digit of 1 if we have to add a digit. - -s/^\(_*\)$/1\1/; tn -s/8\(_*\)$/9\1/; tn -s/7\(_*\)$/8\1/; tn -s/6\(_*\)$/7\1/; tn -s/5\(_*\)$/6\1/; tn -s/4\(_*\)$/5\1/; tn -s/3\(_*\)$/4\1/; tn -s/2\(_*\)$/3\1/; tn -s/1\(_*\)$/2\1/; tn -s/0\(_*\)$/1\1/; tn - -:n -y/_/0/ address@hidden example address@hidden end--------------------------------------------- - address@hidden Rename files to lower case address@hidden Rename Files to Lower Case - -This is a pretty strange use of @command{sed}. We transform text, and -transform it to be shell commands, then just feed them to shell. -Don't worry, even worse hacks are done when using @command{sed}; I have -seen a script converting the output of @command{date} into a @command{bc} -program! - -The main body of this is the @command{sed} script, which remaps the name -from lower to upper (or vice-versa) and even checks out -if the remapped name is the same as the original name. -Note how the script is parameterized using shell -variables and proper quoting. - address@hidden start------------------------------------------- address@hidden -#! /bin/sh -# rename files to lower/upper case... -# -# usage: -# move-to-lower * -# move-to-upper * -# or -# move-to-lower -R . -# move-to-upper -R . -# - -help() address@hidden - cat << eof -Usage: $0 [-n] [-r] [-h] files... - --n do nothing, only see what would be done --R recursive (use find) --h this message -files files to remap to lower case - -Examples: - $0 -n * (see if everything is ok, then...) - $0 * - - $0 -R . - -eof address@hidden - -apply_cmd='sh' -finder='echo "$@@" | tr " " "\n"' -files_only= - -while : -do - case "$1" in - -n) apply_cmd='cat' ;; - -R) finder='find "$@@" -type f';; - -h) help ; exit 1 ;; - *) break ;; - esac - shift -done - -if [ -z "$1" ]; then - echo Usage: $0 [-h] [-n] [-r] files... - exit 1 -fi - -LOWER='abcdefghijklmnopqrstuvwxyz' -UPPER='ABCDEFGHIJKLMNOPQRSTUVWXYZ' - -case `basename $0` in - *upper*) TO=$UPPER; FROM=$LOWER ;; - *) FROM=$UPPER; TO=$LOWER ;; -esac - -eval $finder | sed -n ' - -# remove all trailing slashes -s/\/*$// - -# add ./ if there is no path, only a filename -/\//! s/^/.\// - -# save path+filename -h - -# remove path -s/.*\/// - -# do conversion only on filename -y/'$FROM'/'$TO'/ - -# now line contains original path+file, while -# hold space contains the new filename -x - -# add converted file name to line, which now contains -# path/file-name\nconverted-file-name -G - -# check if converted file name is equal to original file name, -# if it is, do not print anything -/^.*\/\(.*\)\n\1/b - -# escape special characters for the shell -s/["$`\\]/\\&/g - -# now, transform path/fromfile\n, into -# mv path/fromfile path/tofile and print it -s/^\(.*\/\)\(.*\)\n\(.*\)$/mv "\1\2" "\1\3"/p - -' | $apply_cmd address@hidden example address@hidden end--------------------------------------------- - address@hidden Print bash environment address@hidden Print @command{bash} Environment - -This script strips the definition of the shell functions -from the output of the @command{set} Bourne-shell command. - address@hidden start------------------------------------------- address@hidden -#!/bin/sh - -set | sed -n ' -:x - address@hidden -# if no occurrence of "=()" print and load next line address@hidden ifinfo address@hidden -# if no occurrence of @samp{=()} print and load next line address@hidden ifnotinfo -/=()/! @{ p; b; @} -/ () $/! @{ p; b; @} - -# possible start of functions section -# save the line in case this is a var like FOO="() " -h - -# if the next line has a brace, we quit because -# nothing comes after functions -n -/address@hidden/ q - -# print the old line -x; p - -# work on the new line now -x; bx -' address@hidden example address@hidden end--------------------------------------------- - address@hidden Reverse chars of lines address@hidden Reverse Characters of Lines - -This script can be used to reverse the position of characters -in lines. The technique moves two characters at a time, hence -it is faster than more intuitive implementations. - -Note the @code{tx} command before the definition of the label. -This is often needed to reset the flag that is tested by -the @code{t} command. - -Imaginative readers will find uses for this script. An example -is reversing the output of @address@hidden requires -another script to pad the output of banner; for example - address@hidden -#! /bin/sh - -banner -w $1 $2 $3 $4 | - sed -e :a -e '/address@hidden,'$1'address@hidden/ @{ s/$/ /; ba; @}' | - ~/sedscripts/reverseline.sed address@hidden example -} - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -/../! b - -# Reverse a line. Begin embedding the line between two newlines -s/^.*$/\ -&\ -/ - -# Move first character at the end. The regexp matches until -# there are zero or one characters between the markers -tx -:x -s/\(\n.\)\(.*\)\(.\n\)/\3\2\1/ -tx - -# Remove the newline markers -s/\n//g address@hidden example address@hidden end--------------------------------------------- - address@hidden tac address@hidden Reverse Lines of Files - -This one begins a series of totally useless (yet interesting) -scripts emulating various Unix commands. This, in particular, -is a @command{tac} workalike. - -Note that on implementations other than @acronym{GNU} @command{sed} -this script might easily overflow internal buffers. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -# reverse all lines of input, i.e. first line became last, ... - -# from the second line, the buffer (which contains all previous lines) -# is *appended* to current line, so, the order will be reversed -1! G - -# on the last line we're done -- print everything -$ p - -# store everything on the buffer again -h address@hidden example address@hidden end--------------------------------------------- - address@hidden cat -n address@hidden Numbering Lines - -This script replaces @samp{cat -n}; in fact it formats its output -exactly like @acronym{GNU} @command{cat} does. - -Of course this is completely useless and for two reasons: first, -because somebody else did it in C, second, because the following -Bourne-shell script could be used for the same purpose and would -be much faster: - address@hidden start------------------------------------------- address@hidden -#! /bin/sh -sed -e "=" $@@ | sed -e ' - s/^/ / - N - s/^ *\(......\)\n/\1 / -' address@hidden example address@hidden end--------------------------------------------- - -It uses @command{sed} to print the line number, then groups lines two -by two using @code{N}. Of course, this script does not teach as much as -the one presented below. - -The algorithm used for incrementing uses both buffers, so the line -is printed as soon as possible and then discarded. The number -is split so that changing digits go in a buffer and unchanged ones go -in the other; the changed digits are modified in a single step -(using a @code{y} command). The line number for the next line -is then composed and stored in the hold space, to be used in the -next iteration. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -# Prime the pump on the first line -x -/^$/ s/^.*$/1/ - -# Add the correct line number before the pattern -G -h - -# Format it and print it -s/^/ / -s/^ *\(......\)\n/\1 /p - -# Get the line number from hold space; add a zero -# if we're going to add a digit on the next line -g -s/\n.*$// -/^9*$/ s/^/0/ - -# separate changing/unchanged digits with an x -s/.9*$/x&/ - -# keep changing digits in hold space -h -s/^.*x// -y/0123456789/1234567890/ -x - -# keep unchanged digits in pattern space -s/x.*$// - -# compose the new number, remove the newline implicitly added by G -G -s/\n// -h address@hidden example address@hidden end--------------------------------------------- - address@hidden cat -b address@hidden Numbering Non-blank Lines - -Emulating @samp{cat -b} is almost the same as @samp{cat -n}---we only -have to select which lines are to be numbered and which are not. - -The part that is common to this script and the previous one is -not commented to show how important it is to comment @command{sed} -scripts properly... - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -/^$/ @{ - p - b address@hidden - -# Same as cat -n from now -x -/^$/ s/^.*$/1/ -G -h -s/^/ / -s/^ *\(......\)\n/\1 /p -x -s/\n.*$// -/^9*$/ s/^/0/ -s/.9*$/x&/ -h -s/^.*x// -y/0123456789/1234567890/ -x -s/x.*$// -G -s/\n// -h address@hidden example address@hidden end--------------------------------------------- - address@hidden wc -c address@hidden Counting Characters - -This script shows another way to do arithmetic with @command{sed}. -In this case we have to add possibly large numbers, so implementing -this by successive increments would not be feasible (and possibly -even more complicated to contrive than this script). - -The approach is to map numbers to letters, kind of an abacus -implemented with @command{sed}. @samp{a}s are units, @samp{b}s are -tens and so on: we simply add the number of characters -on the current line as units, and then propagate the carry -to tens, hundreds, and so on. - -As usual, running totals are kept in hold space. - -On the last line, we convert the abacus form back to decimal. -For the sake of variety, this is done with a loop rather than -with some 80 @code{s} address@hidden implementations -have a limit of 199 commands per script}: first we -convert units, removing @samp{a}s from the number; then we -rotate letters so that tens become @samp{a}s, and so on -until no more letters remain. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -# Add n+1 a's to hold space (+1 is for the newline) -s/./a/g -H -x -s/\n/a/ - -# Do the carry. The t's and b's are not necessary, -# but they do speed up the thing -t a -: a; s/aaaaaaaaaa/b/g; t b; b done -: b; s/bbbbbbbbbb/c/g; t c; b done -: c; s/cccccccccc/d/g; t d; b done -: d; s/dddddddddd/e/g; t e; b done -: e; s/eeeeeeeeee/f/g; t f; b done -: f; s/ffffffffff/g/g; t g; b done -: g; s/gggggggggg/h/g; t h; b done -: h; s/hhhhhhhhhh//g - -: done -$! @{ - h - b address@hidden - -# On the last line, convert back to decimal - -: loop -/a/! s/[b-h]*/&0/ -s/aaaaaaaaa/9/ -s/aaaaaaaa/8/ -s/aaaaaaa/7/ -s/aaaaaa/6/ -s/aaaaa/5/ -s/aaaa/4/ -s/aaa/3/ -s/aa/2/ -s/a/1/ - -: next -y/bcdefgh/abcdefg/ -/[a-h]/ b loop -p address@hidden example address@hidden end--------------------------------------------- - address@hidden wc -w address@hidden Counting Words - -This script is almost the same as the previous one, once each -of the words on the line is converted to a single @samp{a} -(in the previous script each letter was changed to an @samp{a}). - -It is interesting that real @command{wc} programs have optimized -loops for @samp{wc -c}, so they are much slower at counting -words rather than characters. This script's bottleneck, -instead, is arithmetic, and hence the word-counting one -is faster (it has to manage smaller numbers). - -Again, the common parts are not commented to show the importance -of commenting @command{sed} scripts. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -# Convert words to a's -s/[ @kbd{tab}][ @kbd{tab}]*/ /g -s/^/ / -s/ [^ ][^ ]*/a /g -s/ //g - -# Append them to hold space -H -x -s/\n// - -# From here on it is the same as in wc -c. -/aaaaaaaaaa/! bx; s/aaaaaaaaaa/b/g -/bbbbbbbbbb/! bx; s/bbbbbbbbbb/c/g -/cccccccccc/! bx; s/cccccccccc/d/g -/dddddddddd/! bx; s/dddddddddd/e/g -/eeeeeeeeee/! bx; s/eeeeeeeeee/f/g -/ffffffffff/! bx; s/ffffffffff/g/g -/gggggggggg/! bx; s/gggggggggg/h/g -s/hhhhhhhhhh//g -:x -$! @{ h; b; @} -:y -/a/! s/[b-h]*/&0/ -s/aaaaaaaaa/9/ -s/aaaaaaaa/8/ -s/aaaaaaa/7/ -s/aaaaaa/6/ -s/aaaaa/5/ -s/aaaa/4/ -s/aaa/3/ -s/aa/2/ -s/a/1/ -y/bcdefgh/abcdefg/ -/[a-h]/ by -p address@hidden example address@hidden end--------------------------------------------- - address@hidden wc -l address@hidden Counting Lines - -No strange things are done now, because @command{sed} gives us address@hidden -l} functionality for free!!! Look: - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf -$= address@hidden example address@hidden end--------------------------------------------- - address@hidden head address@hidden Printing the First Lines - -This script is probably the simplest useful @command{sed} script. -It displays the first 10 lines of input; the number of displayed -lines is right before the @code{q} command. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f -10q address@hidden example address@hidden end--------------------------------------------- - address@hidden tail address@hidden Printing the Last Lines - -Printing the last @var{n} lines rather than the first is more complex -but indeed possible. @var{n} is encoded in the second line, before -the bang character. - -This script is similar to the @command{tac} script in that it keeps the -final output in the hold space and prints it at the end: - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -1! @{; H; g; @} -1,10 !s/[^\n]*\n// -$p -h address@hidden example address@hidden end--------------------------------------------- - -Mainly, the scripts keeps a window of 10 lines and slides it -by adding a line and deleting the oldest (the substitution command -on the second line works like a @code{D} command but does not -restart the loop). - -The ``sliding window'' technique is a very powerful way to write -efficient and complex @command{sed} scripts, because commands like address@hidden would require a lot of work if implemented manually. - -To introduce the technique, which is fully demonstrated in the -rest of this chapter and is based on the @code{N}, @code{P} -and @code{D} commands, here is an implementation of @command{tail} -using a simple ``sliding window.'' - -This looks complicated but in fact the working is the same as -the last script: after we have kicked in the appropriate number -of lines, however, we stop using the hold space to keep inter-line -state, and instead use @code{N} and @code{D} to slide pattern -space by one line: - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -1h -2,10 @{; H; g; @} -$q -1,9d -N -D address@hidden example address@hidden end--------------------------------------------- - -Note how the first, second and fourth line are inactive after -the first ten lines of input. After that, all the script does -is: exiting on the last line of input, appending the next input -line to pattern space, and removing the first line. - address@hidden uniq address@hidden Make Duplicate Lines Unique - -This is an example of the art of using the @code{N}, @code{P} -and @code{D} commands, probably the most difficult to master. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f -h - -:b -# On the last line, print and exit -$b -N -/^\(.*\)\n\1$/ @{ - # The two lines are identical. Undo the effect of - # the n command. - g - bb address@hidden - -# If the @code{N} command had added the last line, print and exit -$b - -# The lines are different; print the first and go -# back working on the second. -P -D address@hidden example address@hidden end--------------------------------------------- - -As you can see, we maintain a 2-line window using @code{P} and @code{D}. -This technique is often used in advanced @command{sed} scripts. - address@hidden uniq -d address@hidden Print Duplicated Lines of Input - -This script prints only duplicated lines, like @samp{uniq -d}. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -$b -N -/^\(.*\)\n\1$/ @{ - # Print the first of the duplicated lines - s/.*\n// - p - - # Loop until we get a different line - :b - $b - N - /^\(.*\)\n\1$/ @{ - s/.*\n// - bb - @} address@hidden - -# The last line cannot be followed by duplicates -$b - -# Found a different one. Leave it alone in the pattern space -# and go back to the top, hunting its duplicates -D address@hidden example address@hidden end--------------------------------------------- - address@hidden uniq -u address@hidden Remove All Duplicated Lines - -This script prints only unique lines, like @samp{uniq -u}. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -# Search for a duplicate line --- until that, print what you find. -$b -N -/^\(.*\)\n\1$/ ! @{ - P - D address@hidden - -:c -# Got two equal lines in pattern space. At the -# end of the file we simply exit -$d - -# Else, we keep reading lines with @code{N} until we -# find a different one -s/.*\n// -N -/^\(.*\)\n\1$/ @{ - bc address@hidden - -# Remove the last instance of the duplicate line -# and go back to the top -D address@hidden example address@hidden end--------------------------------------------- - address@hidden cat -s address@hidden Squeezing Blank Lines - -As a final example, here are three scripts, of increasing complexity -and speed, that implement the same function as @samp{cat -s}, that is -squeezing blank lines. - -The first leaves a blank line at the beginning and end if there are -some already. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -# on empty lines, join with next -# Note there is a star in the regexp -:x -/^\n*$/ @{ -N -bx address@hidden - -# now, squeeze all '\n', this can be also done by: -# s/^\(\n\)*/\1/ -s/\n*/\ -/ address@hidden example address@hidden end--------------------------------------------- - -This one is a bit more complex and removes all empty lines -at the beginning. It does leave a single blank line at end -if one was there. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -f - -# delete all leading empty lines -1,/^./@{ -/./!d address@hidden - -# on an empty line we remove it and all the following -# empty lines, but one -:x -/./address@hidden -N -s/^\n$// -tx address@hidden address@hidden example address@hidden end--------------------------------------------- - -This removes leading and trailing blank lines. It is also the -fastest. Note that loops are completely done with @code{n} and address@hidden, without relying on @command{sed} to restart the -script automatically at the end of a line. - address@hidden start------------------------------------------- address@hidden -#!/usr/bin/sed -nf - -# delete all (leading) blanks -/./!d - -# get here: so there is a non empty -:x -# print it -p -# get next -n -# got chars? print it again, etc... -/./bx - -# no, don't have chars: got an empty line -:z -# get next, if last line we finish here so no trailing -# empty lines are written -n -# also empty? then ignore it, and get next... this will -# remove ALL empty lines -/./!bz - -# all empty lines were deleted/ignored, but we have a non empty. As -# what we want to do is to squeeze, insert a blank line artificially -i\ - -bx address@hidden example address@hidden end--------------------------------------------- - address@hidden Limitations address@hidden @value{SSED}'s Limitations and Non-limitations - address@hidden @acronym{GNU} extensions, unlimited line length address@hidden Portability, line length limitations -For those who want to write portable @command{sed} scripts, -be aware that some implementations have been known to -limit line lengths (for the pattern and hold spaces) -to be no more than 4000 bytes. -The @sc{posix} standard specifies that conforming @command{sed} -implementations shall support at least 8192 byte line lengths. address@hidden has no built-in limit on line length; -as long as it can @code{malloc()} more (virtual) memory, -you can feed or construct lines as long as you like. - -However, recursion is used to handle subpatterns and indefinite -repetition. This means that the available stack space may limit -the size of the buffer that can be processed by certain patterns. - - address@hidden Other Resources address@hidden Other Resources for Learning About @command{sed} - address@hidden Additional reading about @command{sed} -In addition to several books that have been written about @command{sed} -(either specifically or as chapters in books which discuss -shell programming), one can find out more about @command{sed} -(including suggestions of a few books) from the FAQ -for the @code{sed-users} mailing list, available from: address@hidden address@hidden://sed.sourceforge.net/sedfaq.html} address@hidden display - -Also of interest are address@hidden://www.student.northpark.edu/pemente/sed/index.htm} -and @uref{http://sed.sf.net/grabbag}, -which include @command{sed} tutorials and other @command{sed}-related goodies. - -The @code{sed-users} mailing list itself maintained by Sven Guckes. -To subscribe, visit @uref{http://groups.yahoo.com} and search -for the @code{sed-users} mailing list. - address@hidden Reporting Bugs address@hidden Reporting Bugs - address@hidden Bugs, reporting -Email bug reports to @email{bug-sed@@gnu.org}. -Also, please include the output of @samp{sed --version} in the body -of your report if at all possible. - -Please do not send a bug report like this: - address@hidden address@hidden@address@hidden building frobme-1.3.4}}} -$ configure address@hidden sed: file sedscr line 1: Unknown option to 's' address@hidden example - -If @value{SSED} doesn't configure your favorite package, take a -few extra minutes to identify the specific problem and make a stand-alone -test case. Unlike other programs such as C compilers, making such test -cases for @command{sed} is quite simple. - -A stand-alone test case includes all the data necessary to perform the -test, and the specific invocation of @command{sed} that causes the problem. -The smaller a stand-alone test case is, the better. A test case should -not involve something as far removed from @command{sed} as ``try to configure -frobme-1.3.4''. Yes, that is in principle enough information to look -for the bug, but that is not a very practical prospect. - -Here are a few commonly reported bugs that are not bugs. - address@hidden @asis address@hidden address@hidden @code{N} command on the last line address@hidden Portability, @code{N} command on the last line address@hidden Non-bugs, @code{N} command on the last line - -Most versions of @command{sed} exit without printing anything when -the @command{N} command is issued on the last line of a file. address@hidden prints pattern space before exiting unless of course -the @command{-n} command switch has been specified. This choice is -by design. - -Default behavior (gnu extension, non-POSIX conforming): address@hidden -$ seq 3 | sed N -1 -2 -3 address@hidden example address@hidden -To force POSIX-conforming behavior: address@hidden -$ seq 3 | sed --posix N -1 -2 address@hidden example - -For example, the behavior of address@hidden -sed N foo bar address@hidden example address@hidden -would depend on whether foo has an even or an odd number of address@hidden is the actual ``bug'' that prompted the -change in behavior}. Or, when writing a script to read the -next few lines following a pattern match, traditional -implementations of @code{sed} would force you to write -something like address@hidden -/foo/@{ $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N; $!N @} address@hidden example address@hidden -instead of just address@hidden -/foo/@{ N;N;N;N;N;N;N;N;N; @} address@hidden example - address@hidden @code{POSIXLY_CORRECT} behavior, @code{N} command -In any case, the simplest workaround is to use @code{$d;N} in -scripts that rely on the traditional behavior, or to set -the @code{POSIXLY_CORRECT} variable to a non-empty value. - address@hidden Regex syntax clashes (problems with backslashes) address@hidden @acronym{GNU} extensions, to basic regular expressions address@hidden Non-bugs, regex syntax clashes address@hidden uses the @sc{posix} basic regular expression syntax. According to -the standard, the meaning of some escape sequences is undefined in -this syntax; notable in the case of @command{sed} are @code{\|}, address@hidden, @code{\?}, @code{\`}, @code{\'}, @code{\<}, address@hidden>}, @code{\b}, @code{\B}, @code{\w}, and @code{\W}. - -As in all @acronym{GNU} programs that use @sc{posix} basic regular -expressions, @command{sed} interprets these escape sequences as special -characters. So, @code{x\+} matches one or more occurrences of @samp{x}. address@hidden|def} matches either @samp{abc} or @samp{def}. - -This syntax may cause problems when running scripts written for other address@hidden Some @command{sed} programs have been written with the -assumption that @code{\|} and @code{\+} match the literal characters address@hidden|} and @code{+}. Such scripts must be modified by removing the -spurious backslashes if they are to be used with modern implementations -of @command{sed}, like address@hidden @command{sed}. - -On the other hand, some scripts use s|abc\|def||g to remove occurrences -of @emph{either} @code{abc} or @code{def}. While this worked until address@hidden 4.0.x, newer versions interpret this as removing the -string @code{abc|def}. This is again undefined behavior according to address@hidden, and this interpretation is arguably more robust: older address@hidden, for example, required that the regex matcher parsed address@hidden/} as @code{/} in the common case of escaping a slash, which is -again undefined behavior; the new behavior avoids this, and this is good -because the regex matcher is only partially under our control. - address@hidden @acronym{GNU} extensions, special escapes -In addition, this version of @command{sed} supports several escape characters -(some of which are multi-character) to insert non-printable characters -in scripts (@code{\a}, @code{\c}, @code{\d}, @code{\o}, @code{\r}, address@hidden, @code{\v}, @code{\x}). These can cause similar problems -with scripts written for other @command{sed}s. - address@hidden @option{-i} clobbers read-only files address@hidden In-place editing address@hidden @value{SSEDEXT}, in-place editing address@hidden Non-bugs, in-place editing - -In short, @samp{sed -i} will let you delete the contents of -a read-only file, and in general the @option{-i} option -(@pxref{Invoking sed, , Invocation}) lets you clobber -protected files. This is not a bug, but rather a consequence -of how the Unix file system works. - -The permissions on a file say what can happen to the data -in that file, while the permissions on a directory say what can -happen to the list of files in that directory. @samp{sed -i} -will not ever open for writing a file that is already on disk. -Rather, it will work on a temporary file that is finally renamed -to the original name: if you rename or delete files, you're actually -modifying the contents of the directory, so the operation depends on -the permissions of the directory, not of the file. For this same -reason, @command{sed} does not let you use @option{-i} on a writable file -in a read-only directory, and will break hard or symbolic links when address@hidden is used on such a file. - address@hidden @code{0a} does not work (gives an error) address@hidden @code{0} address address@hidden @acronym{GNU} extensions, @code{0} address address@hidden Non-bugs, @code{0} address - -There is no line 0. 0 is a special address that is only used to treat -addresses like @code{0,/@var{RE}/} as active when the script starts: if -you write @code{1,/abc/d} and the first line includes the word @samp{abc}, -then that match would be ignored because address ranges must span at least -two lines (barring the end of the file); but what you probably wanted is -to delete every line up to the first one including @samp{abc}, and this -is obtained with @code{0,/abc/d}. - address@hidden PERL address@hidden @code{[a-z]} is case insensitive address@hidden Non-bugs, localization-related - -You are encountering problems with locales. POSIX mandates that @code{[a-z]} -uses the current locale's collation order -- in C parlance, that means using address@hidden(3)} instead of @code{strcmp(3)}. Some locales have a -case-insensitive collation order, others don't. - -Another problem is that @code{[a-z]} tries to use collation symbols. -This only happens if you are on the @acronym{GNU} system, using address@hidden libc's regular expression matcher instead of compiling the -one supplied with @acronym{GNU} sed. In a Danish locale, for example, -the regular expression @code{^[a-z]$} matches the string @samp{aa}, -because this is a single collating symbol that comes after @samp{a} -and before @samp{b}; @samp{ll} behaves similarly in Spanish -locales, or @samp{ij} in Dutch locales. - -To work around these problems, which may cause bugs in shell scripts, set -the @env{LC_COLLATE} and @env{LC_CTYPE} environment variables to @samp{C}. - address@hidden @code{s/.*//} does not clear pattern space address@hidden Non-bugs, localization-related address@hidden @value{SSEDEXT}, emptying pattern space address@hidden Emptying pattern space - -This happens if your input stream includes invalid multibyte -sequences. @sc{posix} mandates that such sequences -are @emph{not} matched by @samp{.}, so that @samp{s/.*//} will not clear -pattern space as you would expect. In fact, there is no way to clear -sed's buffers in the middle of the script in most multibyte locales -(including UTF-8 locales). For this reason, @value{SSED} provides a `z' -command (for `zap') as an extension. - -To work around these problems, which may cause bugs in shell scripts, set -the @env{LC_COLLATE} and @env{LC_CTYPE} environment variables to @samp{C}. address@hidden ifclear address@hidden table - - - - address@hidden address@hidden GNU Free Documentation License address@hidden GNU Free Documentation License - address@hidden fdl.texi - - address@hidden address@hidden Concept Index address@hidden Concept Index - -This is a general index of all issues discussed in this manual, with the -exception of the @command{sed} commands and command-line options. - address@hidden cp - address@hidden address@hidden Command and Option Index address@hidden Command and Option Index - -This is an alphabetical list of all @command{sed} commands and command-line -options. - address@hidden fn - address@hidden address@hidden - address@hidden XXX FIXME: the term "cycle" is never defined... diff --git a/doc/sed.texi b/doc/sed.texi index 33ded0b..fdea20b 100644 --- a/doc/sed.texi +++ b/doc/sed.texi @@ -1,5 +1,4 @@ \input texinfo @c -*-texinfo-*- address@hidden Do not edit this file!! It is automatically generated from sed-in.texi. @c @c -- Stuff that needs adding: ---------------------------------------------- @c (nothing!) -- 2.9.3