diff options
Diffstat (limited to 'lisp/xml.el')
-rw-r--r-- | lisp/xml.el | 240 |
1 files changed, 132 insertions, 108 deletions
diff --git a/lisp/xml.el b/lisp/xml.el index d1e824c4ece..f135bdfabe4 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -95,10 +95,13 @@ ("apos" . "'") ("quot" . "\"") ("amp" . "&")) - "The defined entities. Entities are added to this when the DTD is parsed.") + "Alist of defined XML entities.") + +(defvar xml-parameter-entity-alist nil + "Alist of defined XML parametric entities.") (defvar xml-sub-parser nil - "Dynamically set this to a non-nil value if you want to parse an XML fragment.") + "Non-nil when the XML parser is parsing an XML fragment.") (defvar xml-validating-parser nil "Set to non-nil to get validity checking.") @@ -308,6 +311,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded." ;; specs DTRT. (with-syntax-table (standard-syntax-table) (let ((case-fold-search nil) ; XML is case-sensitive. + ;; Prevent entity definitions from changing the defaults + (xml-entity-alist xml-entity-alist) + (xml-parameter-entity-alist xml-parameter-entity-alist) xml result dtd) (save-excursion (if buffer @@ -366,6 +372,9 @@ specify that the name shouldn't be given a namespace." (defun xml-parse-fragment (&optional parse-dtd parse-ns) "Parse xml-like fragments." (let ((xml-sub-parser t) + ;; Prevent entity definitions from changing the defaults + (xml-entity-alist xml-entity-alist) + (xml-parameter-entity-alist xml-parameter-entity-alist) children) (while (not (eobp)) (let ((bit (xml-parse-tag @@ -413,7 +422,7 @@ Returns one of: (buffer-substring-no-properties pos (match-beginning 0)) (xml-parse-string)))) ;; DTD for the document - ((looking-at "<!DOCTYPE") + ((looking-at "<!DOCTYPE[ \t\n\r]") (let ((dtd (xml-parse-dtd parse-ns))) (skip-syntax-forward " ") (if xml-validating-parser @@ -580,11 +589,11 @@ This follows the rule [28] in the XML specifications." ;; Get the name of the document (looking-at xml-name-regexp) (let ((dtd (list (match-string-no-properties 0) 'dtd)) - type element end-pos) + (xml-parameter-entity-alist xml-parameter-entity-alist)) (goto-char (match-end 0)) - (skip-syntax-forward " ") - ;; XML [75] + + ;; External subset (XML [75]) (cond ((looking-at "PUBLIC\\s-+") (goto-char (match-end 0)) (unless (or (re-search-forward @@ -607,119 +616,137 @@ This follows the rule [28] in the XML specifications." (error "XML: Missing System ID")) (push (list (match-string-no-properties 1) 'system) dtd))) (skip-syntax-forward " ") - (if (eq ?> (char-after)) - (forward-char) - (if (not (eq (char-after) ?\[)) - (error "XML: Bad DTD") + + (if (eq (char-after) ?>) + + ;; No internal subset (forward-char) - ;; Parse the rest of the DTD - ;; Fixme: Deal with NOTATION, PIs. - (while (not (looking-at "\\s-*\\]")) - (skip-syntax-forward " ") - (cond - - ;; Translation of rule [45] of XML specifications - ((looking-at - "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") - - (setq element (match-string-no-properties 1) - type (match-string-no-properties 2)) - (setq end-pos (match-end 0)) - ;; Translation of rule [46] of XML specifications + ;; Internal subset (XML [28b]) + (unless (eq (char-after) ?\[) + (error "XML: Bad DTD")) + (forward-char) + + ;; Parse the rest of the DTD + ;; Fixme: Deal with NOTATION, PIs. + (while (not (looking-at "\\s-*\\]")) + (skip-syntax-forward " ") + (cond + ;; Element declaration [45]: + ((looking-at "<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>") + (let ((element (match-string-no-properties 1)) + (type (match-string-no-properties 2)) + (end-pos (match-end 0))) + ;; Translation of rule [46] of XML specifications (cond - ((string-match "^EMPTY[ \t\n\r]*$" type) ;; empty declaration + ((string-match "^EMPTY[ \t\n\r]*$" type) ; empty declaration (setq type 'empty)) - ((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents + ((string-match "^ANY[ \t\n\r]*$" type) ; any type of contents (setq type 'any)) - ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47]) + ((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ; children ([47]) (setq type (xml-parse-elem-type (match-string-no-properties 1 type)))) - ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution + ((string-match "^%[^;]+;[ \t\n\r]*$" type) ; substitution nil) - (t - (if xml-validating-parser - (error "XML: (Validity) Invalid element type in the DTD")))) + (xml-validating-parser + (error "XML: (Validity) Invalid element type in the DTD"))) - ;; rule [45]: the element declaration must be unique - (if (and (assoc element dtd) - xml-validating-parser) - (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)" - element)) + ;; rule [45]: the element declaration must be unique + (and (assoc element dtd) + xml-validating-parser + (error "XML: (Validity) DTD element declarations must be unique (<%s>)" + element)) ;; Store the element in the DTD (push (list element type) dtd) - (goto-char end-pos)) - - ;; Translation of rule [52] of XML specifications - ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re - "\\)[ \t\n\r]*\\(" xml-att-def-re - "\\)*[ \t\n\r]*>")) - - ;; We don't do anything with ATTLIST currently - (goto-char (match-end 0))) - - ((looking-at "<!--") - (search-forward "-->")) - ((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re - "\\)[ \t\n\r]*\\(" xml-entity-value-re - "\\)[ \t\n\r]*>")) - (let ((name (match-string-no-properties 1)) - (value (substring (match-string-no-properties 2) 1 - (- (length (match-string-no-properties 2)) 1)))) - (goto-char (match-end 0)) - (setq xml-entity-alist - (append xml-entity-alist - (list (cons name - (with-temp-buffer - (insert value) - (goto-char (point-min)) - (xml-parse-fragment - xml-validating-parser - parse-ns)))))))) - ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re - "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" - "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) - (looking-at (concat "<!ENTITY[ \t\n\r]+\\(" xml-name-re - "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" - "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" - "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" - "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" - "[ \t\n\r]*>"))) - (let ((name (match-string-no-properties 1)) - (file (substring (match-string-no-properties 2) 1 - (- (length (match-string-no-properties 2)) 1)))) - (goto-char (match-end 0)) - (setq xml-entity-alist - (append xml-entity-alist - (list (cons name (with-temp-buffer - (insert-file-contents file) - (goto-char (point-min)) - (xml-parse-fragment - xml-validating-parser - parse-ns)))))))) - ;; skip parameter entity declarations - ((or (looking-at (concat "<!ENTITY[ \t\n\r]+%[ \t\n\r]+\\(" xml-name-re - "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" - "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) - (looking-at (concat "<!ENTITY[ \t\n\r]+" - "%[ \t\n\r]+" - "\\(" xml-name-re "\\)[ \t\n\r]+" - "PUBLIC[ \t\n\r]+" - "\\(\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" - "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'\\)[ \t\n\r]+" - "\\(\"[^\"]+\"\\|'[^']+'\\)" - "[ \t\n\r]*>"))) - (goto-char (match-end 0))) - ;; skip parameter entities - ((looking-at (concat "%" xml-name-re ";")) - (goto-char (match-end 0))) - (t - (when xml-validating-parser - (error "XML: (Validity) Invalid DTD item")))))) + (goto-char end-pos))) + + ;; Attribute-list declaration [52] (currently unsupported): + ((looking-at (concat "<!ATTLIST[ \t\n\r]*\\(" xml-name-re + "\\)[ \t\n\r]*\\(" xml-att-def-re + "\\)*[ \t\n\r]*>")) + (goto-char (match-end 0))) + + ;; Comments (skip to end): + ((looking-at "<!--") + (search-forward "-->")) + + ;; Internal entity declarations: + ((looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" + xml-name-re "\\)[ \t\n\r]*\\(" + xml-entity-value-re "\\)[ \t\n\r]*>")) + (let* ((name (prog1 (match-string-no-properties 2) + (goto-char (match-end 0)))) + (alist (if (match-string 1) + 'xml-parameter-entity-alist + 'xml-entity-alist)) + ;; Retrieve the deplacement text: + (value (xml--entity-replacement-text + ;; Entity value, sans quotation marks: + (substring (match-string-no-properties 3) 1 -1)))) + ;; If the same entity is declared more than once, the + ;; first declaration is binding. + (unless (assoc name (symbol-value alist)) + (set alist (cons (cons name value) (symbol-value alist)))))) + + ;; External entity declarations (currently unsupported): + ((or (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" + xml-name-re "\\)[ \t\n\r]+SYSTEM[ \t\n\r]+" + "\\(\"[^\"]*\"\\|'[^']*'\\)[ \t\n\r]*>")) + (looking-at (concat "<!ENTITY[ \t\n\r]+\\(%[ \t\n\r]+\\)?\\(" + xml-name-re "\\)[ \t\n\r]+PUBLIC[ \t\n\r]+" + "\"[- \r\na-zA-Z0-9'()+,./:=?;!*#@$_%]*\"" + "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" + "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" + "[ \t\n\r]*>"))) + (goto-char (match-end 0))) + + ;; Parameter entity: + ((looking-at (concat "%\\(" xml-name-re "\\);")) + (goto-char (match-end 0)) + (let* ((entity (match-string 1)) + (end (point-marker)) + (elt (assoc entity xml-parameter-entity-alist))) + (when elt + (replace-match (cdr elt) t t) + (goto-char end)))) + + ;; Anything else: + (xml-validating-parser + (error "XML: (Validity) Invalid DTD item")))) + (if (looking-at "\\s-*]>") (goto-char (match-end 0)))) (nreverse dtd))) +(defun xml--entity-replacement-text (string) + "Return the replacement text for the entity value STRING. +The replacement text is obtained by replacing character +references and parameter-entity references." + (let ((ref-re (eval-when-compile + (concat "\\(?:&#\\([0-9]+\\)\\|&#x\\([0-9a-fA-F]+\\)\\|%\\(" + xml-name-re "\\)\\);"))) + children) + (while (string-match ref-re string) + (push (substring string 0 (match-beginning 0)) children) + (let ((remainder (substring string (match-end 0))) + ref val) + (cond ((setq ref (match-string 1 string)) + ;; Decimal character reference + (setq val (decode-char 'ucs (string-to-number ref))) + (if val (push (string val) children))) + ;; Hexadecimal character reference + ((setq ref (match-string 2 string)) + (setq val (decode-char 'ucs (string-to-number ref 16))) + (if val (push (string val) children))) + ;; Parameter entity reference + ((setq ref (match-string 3 string)) + (setq val (assoc ref xml-parameter-entity-alist)) + (if val + (push (cdr val) children) + (push (concat "%" ref ";") children)))) + (setq string remainder))) + (mapconcat 'identity (nreverse (cons string children)) ""))) + (defun xml-parse-elem-type (string) "Convert element type STRING into a Lisp structure." @@ -864,15 +891,12 @@ The first line is indented with the optional INDENT-STRING." (defalias 'xml-print 'xml-debug-print) (defun xml-escape-string (string) - "Return the string with entity substitutions made from -xml-entity-alist." + "Return STRING with entity substitutions made from `xml-entity-alist'." (mapconcat (lambda (byte) (let ((char (char-to-string byte))) (if (rassoc char xml-entity-alist) (concat "&" (car (rassoc char xml-entity-alist)) ";") char))) - ;; This differs from the non-unicode branch. Just - ;; grabbing the string works here. string "")) (defun xml-debug-print-internal (xml indent-string) |