diff options
Diffstat (limited to 'lisp/xml.el')
-rw-r--r-- | lisp/xml.el | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/lisp/xml.el b/lisp/xml.el index cec1f8a4e16..833eb8aced2 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -2,7 +2,7 @@ ;; Copyright (C) 2000-2019 Free Software Foundation, Inc. -;; Author: Emmanuel Briot <briot@gnat.com> +;; Author: Emmanuel Briot <briot@gnat.com> ;; Maintainer: Mark A. Hershberger <mah@everybody.org> ;; Keywords: xml, data @@ -176,11 +176,11 @@ See also `xml-get-attribute-or-nil'." ;; [4] NameStartChar ;; See the definition of word syntax in `xml-syntax-table'. -(defconst xml-name-start-char-re (concat "[[:word:]:_]")) +(defconst xml-name-start-char-re "[[:word:]:_]") ;; [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 ;; | [#x0300-#x036F] | [#x203F-#x2040] -(defconst xml-name-char-re (concat "[-0-9.[:word:]:_·̀-ͯ‿-⁀]")) +(defconst xml-name-char-re "[[:word:]:_.0-9\u00B7\u0300-\u036F\u203F\u2040-]") ;; [5] Name ::= NameStartChar (NameChar)* (defconst xml-name-re (concat xml-name-start-char-re xml-name-char-re "*")) @@ -194,13 +194,13 @@ See also `xml-get-attribute-or-nil'." ;; [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* (defconst xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*")) -;; [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' -(defconst xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)") +;; [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [[:xdigit:]]+ ';' +(defconst xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[[:xdigit:]]+;\\)") ;; [68] EntityRef ::= '&' Name ';' (defconst xml-entity-ref (concat "&" xml-name-re ";")) -(defconst xml-entity-or-char-ref-re (concat "&\\(?:#\\(x\\)?\\([0-9a-fA-F]+\\)\\|\\(" +(defconst xml-entity-or-char-ref-re (concat "&\\(?:#\\(x\\)?\\([[:xdigit:]]+\\)\\|\\(" xml-name-re "\\)\\);")) ;; [69] PEReference ::= '%' Name ';' @@ -245,7 +245,6 @@ See also `xml-get-attribute-or-nil'." ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType ;; [55] StringType ::= 'CDATA' (defconst xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re - "\\|" xml-notation-type-re "\\|" xml-enumerated-type-re "\\)")) ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) @@ -718,10 +717,10 @@ This follows the rule [28] in the XML specifications." (cond ((looking-at "PUBLIC\\s-+") (goto-char (match-end 0)) (unless (or (re-search-forward - "\\=\"\\([[:space:][:alnum:]-'()+,./:=?;!*#@$_%]*\\)\"" + "\\=\"\\([[:space:][:alnum:]'()+,./:=?;!*#@$_%-]*\\)\"" nil t) (re-search-forward - "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" + "\\='\\([[:space:][:alnum:]()+,./:=?;!*#@$_%-]*\\)'" nil t)) (error "XML: Missing Public ID")) (let ((pubid (match-string-no-properties 1))) @@ -890,7 +889,7 @@ This follows the rule [28] in the XML specifications." The replacement text is obtained by replacing character references and parameter-entity references." (let ((ref-re (eval-when-compile - (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\(" + (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([[:xdigit:]]+\\)\\|%\\(" xml-name-re "\\)\\);"))) children) (while (string-match ref-re string) @@ -1073,6 +1072,19 @@ The first line is indented with INDENT-STRING." (insert ?\n indent-string)) (insert ?< ?/ (symbol-name (xml-node-name xml)) ?>)))) +;;;###autoload +(defun xml-remove-comments (beg end) + "Remove XML/HTML comments in the region between BEG and END. +All text between the <!-- ... --> markers will be removed." + (save-excursion + (save-restriction + (narrow-to-region beg end) + (goto-char beg) + (while (search-forward "<!--" nil t) + (let ((start (match-beginning 0))) + (when (search-forward "-->" nil t) + (delete-region start (point)))))))) + (provide 'xml) ;;; xml.el ends here |