summaryrefslogtreecommitdiff
path: root/lisp/xml.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/xml.el')
-rw-r--r--lisp/xml.el23
1 files changed, 20 insertions, 3 deletions
diff --git a/lisp/xml.el b/lisp/xml.el
index dc774a202cf..c96ff80446a 100644
--- a/lisp/xml.el
+++ b/lisp/xml.el
@@ -655,7 +655,7 @@ Leave point at the first non-blank character after the tag."
(setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
(goto-char end-pos)
- ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
+ ;; See also: https://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
;; Do we have a string between quotes (or double-quotes),
;; or a simple word ?
@@ -1015,7 +1015,10 @@ The first line is indented with the optional INDENT-STRING."
(defalias 'xml-print 'xml-debug-print)
-(defun xml-escape-string (string)
+(defconst xml-invalid-characters-re
+ "[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]")
+
+(defun xml-escape-string (string &optional noerror)
"Convert STRING into a string containing valid XML character data.
Replace occurrences of &<>\\='\" in STRING with their default XML
entity references (e.g., replace each & with &amp;).
@@ -1023,9 +1026,20 @@ entity references (e.g., replace each & with &amp;).
XML character data must not contain & or < characters, nor the >
character under some circumstances. The XML spec does not impose
restriction on \" or \\=', but we just substitute for these too
-\(as is permitted by the spec)."
+\(as is permitted by the spec).
+
+If STRING contains characters that are invalid in XML (as defined
+by https://www.w3.org/TR/xml/#charsets), operate depending on the
+value of NOERROR: if it is non-nil, remove them; else, signal an
+error of type `xml-invalid-character'."
(with-temp-buffer
(insert string)
+ (goto-char (point-min))
+ (while (re-search-forward xml-invalid-characters-re nil t)
+ (if noerror
+ (replace-match "")
+ (signal 'xml-invalid-character
+ (list (char-before) (match-beginning 0)))))
(dolist (substitution '(("&" . "&amp;")
("<" . "&lt;")
(">" . "&gt;")
@@ -1036,6 +1050,9 @@ restriction on \" or \\=', but we just substitute for these too
(replace-match (cdr substitution) t t nil)))
(buffer-string)))
+(define-error 'xml-invalid-character "Invalid XML character"
+ 'wrong-type-argument)
+
(defun xml-debug-print-internal (xml indent-string)
"Outputs the XML tree in the current buffer.
The first line is indented with INDENT-STRING."