[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] master a122a02: Make [:graph:] act like [:print:] sans spa
From: |
Paul Eggert |
Subject: |
[Emacs-diffs] master a122a02: Make [:graph:] act like [:print:] sans space |
Date: |
Wed, 15 Apr 2015 07:27:24 +0000 |
branch: master
commit a122a0276bddbda8ca84f9b94250a5a5f4e0582a
Author: Paul Eggert <address@hidden>
Commit: Paul Eggert <address@hidden>
Make [:graph:] act like [:print:] sans space
In POSIX [[:print:]] is equivalent to [ [:graph:]], so change
[:graph:] so that it matches everything that [:print:] does,
except for space.
* doc/lispref/searching.texi (Char Classes):
* etc/NEWS:
* lisp/emacs-lisp/rx.el (rx):
Document [:graph:] to be [:print:] sans ' '.
* src/character.c, src/character.h (graphicp): New function.
* src/regex.c (ISGRAPH) [emacs]: Use it.
(BIT_GRAPH): New macro.
(BIT_PRINT): Increase to 0x200, to make room for BIT_GRAPH.
(re_wctype_to_bit) [! WIDE_CHAR_SUPPORT]:
Return BIT_GRAPH for RECC_GRAPH.
(re_match_2_internal) [emacs]: Use ISGRAPH if BIT_GRAPH,
and ISPRINT if BIT_PRINT.
---
doc/lispref/searching.texi | 14 +++++++-------
etc/NEWS | 10 +++++-----
lisp/emacs-lisp/rx.el | 8 ++++----
src/character.c | 8 ++++++++
src/character.h | 1 +
src/regex.c | 12 ++++++++----
6 files changed, 33 insertions(+), 20 deletions(-)
diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi
index 238d814..10ea411 100644
--- a/doc/lispref/searching.texi
+++ b/doc/lispref/searching.texi
@@ -558,8 +558,11 @@ This matches any @acronym{ASCII} control character.
This matches @samp{0} through @samp{9}. Thus, @samp{[-+[:digit:]]}
matches any digit, as well as @samp{+} and @samp{-}.
@item [:graph:]
-This matches graphic characters---everything except @acronym{ASCII} control
-characters, space, and the delete character.
+This matches graphic characters---everything except space,
address@hidden and address@hidden control characters,
+surrogates, and codepoints unassigned by Unicode, as indicated by the
+Unicode @samp{general-category} property (@pxref{Character
+Properties}).
@item [:lower:]
This matches any lower-case letter, as determined by the current case
table (@pxref{Case Tables}). If @code{case-fold-search} is
@@ -569,11 +572,8 @@ This matches any multibyte character (@pxref{Text
Representations}).
@item [:nonascii:]
This matches any address@hidden character.
@item [:print:]
-This matches printing characters---everything except @acronym{ASCII}
-and address@hidden control characters (including the delete
-character), surrogates, and codepoints unassigned by Unicode, as
-indicated by the Unicode @samp{general-category} property
-(@pxref{Character Properties}).
+This matches any printing character---either space, or a graphic
+character matched by @samp{[:graph:]}.
@item [:punct:]
This matches any punctuation character. (At present, for multibyte
characters, it matches anything that has non-word syntax.)
diff --git a/etc/NEWS b/etc/NEWS
index 907787a..d97e80a 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -629,12 +629,12 @@ notifications, if Emacs is compiled with file
notification support.
*** gulp.el
+++
-** The character class [:print:] in regular expressions
-no longer matches any multibyte character. Instead, Emacs now
+** The character classes [:graph:] and [:print:] in regular expressions
+no longer match every multibyte character. Instead, Emacs now
consults the Unicode character properties to determine which
-characters are printable. In particular, surrogates and unassigned
-codepoints are now rejected by this class. If you want the old
-behavior, use [:multibyte:] instead.
+characters are graphic or printable. In particular, surrogates and
+unassigned codepoints are now rejected. If you want the old behavior,
+use [:multibyte:] instead.
* New Modes and Packages in Emacs 25.1
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el
index a5a228e..ab9beb6 100644
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -965,12 +965,12 @@ CHAR
matches space and tab only.
`graphic', `graph'
- matches graphic characters--everything except ASCII control chars,
- space, and DEL.
+ matches graphic characters--everything except space, ASCII
+ and non-ASCII control characters, surrogates, and codepoints
+ unassigned by Unicode.
`printing', `print'
- matches printing characters--everything except ASCII and non-ASCII
- control characters, surrogates, and codepoints unassigned by Unicode.
+ matches space and graphic characters.
`alphanumeric', `alnum'
matches alphabetic characters and digits. (For multibyte characters,
diff --git a/src/character.c b/src/character.c
index b357dd5..ea98cf6 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1022,6 +1022,14 @@ decimalnump (int c)
return gen_cat == UNICODE_CATEGORY_Nd;
}
+/* Return 'true' if C is a graphic character as defined by its
+ Unicode properties. */
+bool
+graphicp (int c)
+{
+ return c == ' ' || printablep (c);
+}
+
/* Return 'true' if C is a printable character as defined by its
Unicode properties. */
bool
diff --git a/src/character.h b/src/character.h
index 1a5d2c8..859d717 100644
--- a/src/character.h
+++ b/src/character.h
@@ -662,6 +662,7 @@ extern Lisp_Object string_escape_byte8 (Lisp_Object);
extern bool alphabeticp (int);
extern bool decimalnump (int);
+extern bool graphicp (int);
extern bool printablep (int);
/* Return a translation table of id number ID. */
diff --git a/src/regex.c b/src/regex.c
index b9d09d0..4af70c6 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -314,7 +314,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
- : 1)
+ : graphicp (c))
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
@@ -1875,7 +1875,8 @@ struct range_table_work_area
#define BIT_MULTIBYTE 0x20
#define BIT_ALPHA 0x40
#define BIT_ALNUM 0x80
-#define BIT_PRINT 0x100
+#define BIT_GRAPH 0x100
+#define BIT_PRINT 0x200
/* Set the bit for character C in a list. */
@@ -2074,7 +2075,7 @@ re_wctype_to_bit (re_wctype_t cc)
{
switch (cc)
{
- case RECC_NONASCII: case RECC_GRAPH:
+ case RECC_NONASCII:
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
case RECC_ALPHA: return BIT_ALPHA;
case RECC_ALNUM: return BIT_ALNUM;
@@ -2083,6 +2084,7 @@ re_wctype_to_bit (re_wctype_t cc)
case RECC_UPPER: return BIT_UPPER;
case RECC_PUNCT: return BIT_PUNCT;
case RECC_SPACE: return BIT_SPACE;
+ case RECC_GRAPH: return BIT_GRAPH;
case RECC_PRINT: return BIT_PRINT;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
@@ -5522,7 +5524,9 @@ re_match_2_internal (struct re_pattern_buffer *bufp,
const_re_char *string1,
| (class_bits & BIT_UPPER && ISUPPER (c))
| (class_bits & BIT_WORD && ISWORD (c))
| (class_bits & BIT_ALPHA && ISALPHA (c))
- | (class_bits & BIT_ALNUM && ISALNUM (c)))
+ | (class_bits & BIT_ALNUM && ISALNUM (c))
+ | (class_bits & BIT_GRAPH && ISGRAPH (c))
+ | (class_bits & BIT_PRINT && ISPRINT (c)))
not = !not;
else
CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] master a122a02: Make [:graph:] act like [:print:] sans space,
Paul Eggert <=