[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] master 01/01: 'libxml-parse(html|xml)-region': new optiona
From: |
Ulf Jasper |
Subject: |
[Emacs-diffs] master 01/01: 'libxml-parse(html|xml)-region': new optional param 'discard-comments'. |
Date: |
Fri, 21 Nov 2014 15:35:54 +0000 |
branch: master
commit c39443c1d651bab2eb023f4c38db418c3dc04160
Author: Ulf Jasper <address@hidden>
Date: Fri Nov 21 16:31:30 2014 +0100
'libxml-parse(html|xml)-region': new optional param 'discard-comments'.
* doc/lispref/text.texi (Parsing HTML/XML): Document new optional parameter
'discard-comments' of 'libxml-parse(html|xml)-region'.
* src/xml.c (parse_region): Take care of new optional parameter
'discard-comments' of 'libxml-parse(html|xml)-region'.
(Flibxml_parse_html_region, Flibxml_parse_xml_region): New
optional parameter 'discard-comments'.
* test/automated/libxml-tests.el
(libxml-tests--data-comments-preserved): Renamed from
'libxml-tests--data'.
(libxml-tests--data-comments-discarded): New.
(libxml-tests): Check whether 'libxml-parse-xml-region' is
discarding comments correctly.
---
doc/lispref/ChangeLog | 5 ++++
doc/lispref/text.texi | 7 ++++-
src/ChangeLog | 7 ++++++
src/xml.c | 47 ++++++++++++++++++++++-----------------
test/ChangeLog | 9 +++++++
test/automated/libxml-tests.el | 26 ++++++++++++++++++---
6 files changed, 74 insertions(+), 27 deletions(-)
diff --git a/doc/lispref/ChangeLog b/doc/lispref/ChangeLog
index 6706f93..0c8792a 100644
--- a/doc/lispref/ChangeLog
+++ b/doc/lispref/ChangeLog
@@ -1,3 +1,8 @@
+2014-11-21 Ulf Jasper <address@hidden>
+
+ * text.texi (Parsing HTML/XML): Document new optional parameter
+ 'discard-comments' of 'libxml-parse(html|xml)-region'.
+
2014-11-18 Leo Liu <address@hidden>
* functions.texi (Advising Named Functions): Document
diff --git a/doc/lispref/text.texi b/doc/lispref/text.texi
index d1a1e6f..7c88a5b 100644
--- a/doc/lispref/text.texi
+++ b/doc/lispref/text.texi
@@ -4324,7 +4324,7 @@ coding instead.
When Emacs is compiled with libxml2 support, the following functions
are available to parse HTML or XML text into Lisp object trees.
address@hidden libxml-parse-html-region start end &optional base-url
address@hidden libxml-parse-html-region start end &optional base-url
discard-comments
This function parses the text between @var{start} and @var{end} as
HTML, and returns a list representing the HTML @dfn{parse tree}. It
attempts to handle ``real world'' HTML by robustly coping with syntax
@@ -4333,6 +4333,9 @@ mistakes.
The optional argument @var{base-url}, if address@hidden, should be a
string specifying the base URL for relative URLs occurring in links.
+If the optional argument @var{discard-comments} is address@hidden,
+then the parse tree is created without any comments.
+
In the parse tree, each HTML node is represented by a list in which
the first element is a symbol representing the node name, the second
element is an alist of node attributes, and the remaining elements are
@@ -4368,7 +4371,7 @@ buffer. The argument @var{dom} should be a list as
generated by
@end defun
@cindex parsing xml
address@hidden libxml-parse-xml-region start end &optional base-url
address@hidden libxml-parse-xml-region start end &optional base-url
discard-comments
This function is the same as @code{libxml-parse-html-region}, except
that it parses the text as XML rather than HTML (so it is stricter
about syntax).
diff --git a/src/ChangeLog b/src/ChangeLog
index b169479..f8c9c5a 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,10 @@
+2014-11-21 Ulf Jasper <address@hidden>
+
+ * xml.c (parse_region): Take care of new optional parameter
+ 'discard-comments' of 'libxml-parse(html|xml)-region'.
+ (Flibxml_parse_html_region, Flibxml_parse_xml_region): New
+ optional parameter 'discard-comments'.
+
2014-11-17 Paul Eggert <address@hidden>
Improve time stamp handling, and be more consistent about it.
diff --git a/src/xml.c b/src/xml.c
index 7e99beb..d418202 100644
--- a/src/xml.c
+++ b/src/xml.c
@@ -175,7 +175,7 @@ make_dom (xmlNode *node)
}
static Lisp_Object
-parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, int
htmlp)
+parse_region (Lisp_Object start, Lisp_Object end, Lisp_Object base_url,
Lisp_Object discard_comments, int htmlp)
{
xmlDoc *doc;
Lisp_Object result = Qnil;
@@ -214,21 +214,24 @@ parse_region (Lisp_Object start, Lisp_Object end,
Lisp_Object base_url, int html
if (doc != NULL)
{
- /* If the document is just comments, then this should get us the
- nodes anyway. */
- xmlNode *n = doc->children;
Lisp_Object r = Qnil;
-
- while (n) {
- if (!NILP (r))
- result = Fcons (r, result);
- r = make_dom (n);
- n = n->next;
- }
+ if (NILP(discard_comments))
+ {
+ /* If the document has toplevel comments, then this should
+ get us the nodes and the comments. */
+ xmlNode *n = doc->children;
+
+ while (n) {
+ if (!NILP (r))
+ result = Fcons (r, result);
+ r = make_dom (n);
+ n = n->next;
+ }
+ }
if (NILP (result)) {
- /* The document isn't just comments, so get the tree the
- proper way. */
+ /* The document doesn't have toplevel comments or we discarded
+ them. Get the tree the proper way. */
xmlNode *node = fn_xmlDocGetRootElement (doc);
if (node != NULL)
result = make_dom (node);
@@ -251,25 +254,27 @@ xml_cleanup_parser (void)
DEFUN ("libxml-parse-html-region", Flibxml_parse_html_region,
Slibxml_parse_html_region,
- 2, 3, 0,
+ 2, 4, 0,
doc: /* Parse the region as an HTML document and return the parse tree.
-If BASE-URL is non-nil, it is used to expand relative URLs. */)
- (Lisp_Object start, Lisp_Object end, Lisp_Object base_url)
+If BASE-URL is non-nil, it is used to expand relative URLs.
+If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
+ (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object
discard_comments)
{
if (init_libxml2_functions ())
- return parse_region (start, end, base_url, 1);
+ return parse_region (start, end, base_url, discard_comments, 1);
return Qnil;
}
DEFUN ("libxml-parse-xml-region", Flibxml_parse_xml_region,
Slibxml_parse_xml_region,
- 2, 3, 0,
+ 2, 4, 0,
doc: /* Parse the region as an XML document and return the parse tree.
-If BASE-URL is non-nil, it is used to expand relative URLs. */)
- (Lisp_Object start, Lisp_Object end, Lisp_Object base_url)
+If BASE-URL is non-nil, it is used to expand relative URLs.
+If DISCARD-COMMENTS is non-nil, all HTML comments are discarded. */)
+ (Lisp_Object start, Lisp_Object end, Lisp_Object base_url, Lisp_Object
discard_comments)
{
if (init_libxml2_functions ())
- return parse_region (start, end, base_url, 0);
+ return parse_region (start, end, base_url, discard_comments, 0);
return Qnil;
}
diff --git a/test/ChangeLog b/test/ChangeLog
index 475b6a3..d0988e4 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,12 @@
+2014-11-21 Ulf Jasper <address@hidden>
+
+ * automated/libxml-tests.el
+ (libxml-tests--data-comments-preserved): Renamed from
+ 'libxml-tests--data'.
+ (libxml-tests--data-comments-discarded): New.
+ (libxml-tests): Check whether 'libxml-parse-xml-region' is
+ discarding comments correctly.
+
2014-11-17 Michal Nazarewicz <address@hidden>
* automated/tildify-tests.el (tildify-test-html, tildify-test-xml):
diff --git a/test/automated/libxml-tests.el b/test/automated/libxml-tests.el
index ced0df7..6b6d017 100644
--- a/test/automated/libxml-tests.el
+++ b/test/automated/libxml-tests.el
@@ -27,7 +27,7 @@
(require 'ert)
-(defvar libxml-tests--data
+(defvar libxml-tests--data-comments-preserved
`(;; simple case
("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>"
. (foo ((baz . "true")) "bar"))
@@ -40,17 +40,35 @@
"<bar>blub</bar></foo><!--comment-b--><!--comment-c-->")
. (top nil (comment nil "comment-a") (foo ((a . "b")) (bar nil "blub"))
(comment nil "comment-b") (comment nil "comment-c"))))
- "Alist of XML strings and their expected parse trees.")
+ "Alist of XML strings and their expected parse trees for preserved
comments.")
+
+(defvar libxml-tests--data-comments-discarded
+ `(;; simple case
+ ("<?xml version=\"1.0\"?><foo baz=\"true\">bar</foo>"
+ . (foo ((baz . "true")) "bar"))
+ ;; toplevel comments -- first document child must not get lost
+ (,(concat "<?xml version=\"1.0\"?><foo>bar</foo><!--comment-1-->"
+ "<!--comment-2-->")
+ . (foo nil "bar"))
+ (,(concat "<?xml version=\"1.0\"?><!--comment-a--><foo a=\"b\">"
+ "<bar>blub</bar></foo><!--comment-b--><!--comment-c-->")
+ . (foo ((a . "b")) (bar nil "blub"))))
+ "Alist of XML strings and their expected parse trees for discarded
comments.")
(ert-deftest libxml-tests ()
"Test libxml."
(when (fboundp 'libxml-parse-xml-region)
(with-temp-buffer
- (dolist (test libxml-tests--data)
+ (dolist (test libxml-tests--data-comments-preserved)
+ (erase-buffer)
+ (insert (car test))
+ (should (equal (cdr test)
+ (libxml-parse-xml-region (point-min) (point-max)))))
+ (dolist (test libxml-tests--data-comments-discarded)
(erase-buffer)
(insert (car test))
(should (equal (cdr test)
- (libxml-parse-xml-region (point-min) (point-max))))))))
+ (libxml-parse-xml-region (point-min) (point-max) nil
t)))))))
;;; libxml-tests.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] master 01/01: 'libxml-parse(html|xml)-region': new optional param 'discard-comments'.,
Ulf Jasper <=