diff options
Diffstat (limited to 'lisp/xml.el')
-rw-r--r-- | lisp/xml.el | 23 |
1 files changed, 20 insertions, 3 deletions
diff --git a/lisp/xml.el b/lisp/xml.el index dc774a202cf..c96ff80446a 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -655,7 +655,7 @@ Leave point at the first non-blank character after the tag." (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns)) (goto-char end-pos) - ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize + ;; See also: https://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize ;; Do we have a string between quotes (or double-quotes), ;; or a simple word ? @@ -1015,7 +1015,10 @@ The first line is indented with the optional INDENT-STRING." (defalias 'xml-print 'xml-debug-print) -(defun xml-escape-string (string) +(defconst xml-invalid-characters-re + "[^\u0009\u000A\u000D\u0020-\uD7FF\uE000-\uFFFD\U00010000-\U0010FFFF]") + +(defun xml-escape-string (string &optional noerror) "Convert STRING into a string containing valid XML character data. Replace occurrences of &<>\\='\" in STRING with their default XML entity references (e.g., replace each & with &). @@ -1023,9 +1026,20 @@ entity references (e.g., replace each & with &). XML character data must not contain & or < characters, nor the > character under some circumstances. The XML spec does not impose restriction on \" or \\=', but we just substitute for these too -\(as is permitted by the spec)." +\(as is permitted by the spec). + +If STRING contains characters that are invalid in XML (as defined +by https://www.w3.org/TR/xml/#charsets), operate depending on the +value of NOERROR: if it is non-nil, remove them; else, signal an +error of type `xml-invalid-character'." (with-temp-buffer (insert string) + (goto-char (point-min)) + (while (re-search-forward xml-invalid-characters-re nil t) + (if noerror + (replace-match "") + (signal 'xml-invalid-character + (list (char-before) (match-beginning 0))))) (dolist (substitution '(("&" . "&") ("<" . "<") (">" . ">") @@ -1036,6 +1050,9 @@ restriction on \" or \\=', but we just substitute for these too (replace-match (cdr substitution) t t nil))) (buffer-string))) +(define-error 'xml-invalid-character "Invalid XML character" + 'wrong-type-argument) + (defun xml-debug-print-internal (xml indent-string) "Outputs the XML tree in the current buffer. The first line is indented with INDENT-STRING." |