diff options
author | Mark A. Hershberger <mah@everybody.org> | 2007-11-23 06:58:00 +0000 |
---|---|---|
committer | Mark A. Hershberger <mah@everybody.org> | 2007-11-23 06:58:00 +0000 |
commit | 8cd39fb3c4cf47d2464f00eaa69c587e17dd11cc (patch) | |
tree | 7bcd47a7dcbbad100dd3e8f8a7e08b48353c58a8 /lisp/nxml/rng-cmpct.el | |
parent | f7cf8b2009b0bc2526d50c3455f737a543122dd4 (diff) | |
download | emacs-8cd39fb3c4cf47d2464f00eaa69c587e17dd11cc.tar.gz emacs-8cd39fb3c4cf47d2464f00eaa69c587e17dd11cc.tar.bz2 emacs-8cd39fb3c4cf47d2464f00eaa69c587e17dd11cc.zip |
Initial merge of nxml
Diffstat (limited to 'lisp/nxml/rng-cmpct.el')
-rw-r--r-- | lisp/nxml/rng-cmpct.el | 937 |
1 files changed, 937 insertions, 0 deletions
diff --git a/lisp/nxml/rng-cmpct.el b/lisp/nxml/rng-cmpct.el new file mode 100644 index 00000000000..29699b60122 --- /dev/null +++ b/lisp/nxml/rng-cmpct.el @@ -0,0 +1,937 @@ +;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: James Clark +;; Keywords: XML, RelaxNG + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2 of +;; the License, or (at your option) any later version. + +;; This program is distributed in the hope that it will be +;; useful, but WITHOUT ANY WARRANTY; without even the implied +;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. See the GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public +;; License along with this program; if not, write to the Free +;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +;; MA 02111-1307 USA + +;;; Commentary: + +;; This parses a RELAX NG Compact Syntax schema into the form +;; specified in rng-pttrn.el. +;; +;; RELAX NG Compact Syntax is specified by +;; http://relaxng.org/compact.html +;; +;; This file uses the prefix "rng-c-". + +;;; Code: + +(require 'nxml-util) +(require 'rng-util) +(require 'rng-uri) +(require 'rng-pttrn) + +;;;###autoload +(defun rng-c-load-schema (filename) + "Load a schema in RELAX NG compact syntax from FILENAME. +Return a pattern." + (rng-c-parse-file filename)) + +;;; Error handling + +(put 'rng-c-incorrect-schema + 'error-conditions + '(error rng-error nxml-file-parse-error rng-c-incorrect-schema)) + +(put 'rng-c-incorrect-schema + 'error-message + "Incorrect schema") + +(defun rng-c-signal-incorrect-schema (filename pos message) + (nxml-signal-file-parse-error filename + pos + message + 'rng-c-incorrect-schema)) + +;;; Lexing + +(defconst rng-c-keywords + '("attribute" + "default" + "datatypes" + "div" + "element" + "empty" + "external" + "grammar" + "include" + "inherit" + "list" + "mixed" + "namespace" + "notAllowed" + "parent" + "start" + "string" + "text" + "token") + "List of strings that are keywords in the compact syntax.") + +(defconst rng-c-anchored-keyword-re + (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'") + "Regular expression to match a keyword in the compact syntax.") + +(defvar rng-c-syntax-table nil + "Syntax table for parsing the compact syntax.") + +(if rng-c-syntax-table + () + (setq rng-c-syntax-table (make-syntax-table)) + (modify-syntax-entry ?# "<" rng-c-syntax-table) + (modify-syntax-entry ?\n ">" rng-c-syntax-table) + (modify-syntax-entry ?- "w" rng-c-syntax-table) + (modify-syntax-entry ?. "w" rng-c-syntax-table) + (modify-syntax-entry ?_ "w" rng-c-syntax-table) + (modify-syntax-entry ?: "_" rng-c-syntax-table)) + +(defconst rng-c-literal-1-re + "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'" + "Regular expression to match a single-quoted literal.") + +(defconst rng-c-literal-2-re + (replace-regexp-in-string "'" "\"" rng-c-literal-1-re) + "Regular expression to match a double-quoted literal.") + +(defconst rng-c-ncname-re "\\w+") + +(defconst rng-c-anchored-ncname-re + (concat "\\`" rng-c-ncname-re "\\'")) + +(defconst rng-c-token-re + (concat "[&|]=" "\\|" + "[][()|&,*+?{}~=-]" "\\|" + rng-c-literal-1-re "\\|" + rng-c-literal-2-re "\\|" + rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|" + "\\\\" rng-c-ncname-re "\\|" + ">>") + "Regular expression to match a token in the compact syntax.") + +(defun rng-c-init-buffer () + (setq case-fold-search nil) ; automatically becomes buffer-local when set + (set-buffer-multibyte t) + (set-syntax-table rng-c-syntax-table)) + +(defvar rng-c-current-token nil) +(make-variable-buffer-local 'rng-c-current-token) + +(defun rng-c-advance () + (cond ((looking-at rng-c-token-re) + (setq rng-c-current-token (match-string 0)) + (goto-char (match-end 0)) + (forward-comment (point-max))) + ((= (point) (point-max)) + (setq rng-c-current-token "")) + (t (rng-c-error "Invalid token")))) + +(defconst rng-c-anchored-datatype-name-re + (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'")) + +(defsubst rng-c-current-token-keyword-p () + (string-match rng-c-anchored-keyword-re rng-c-current-token)) + +(defsubst rng-c-current-token-prefixed-name-p () + (string-match rng-c-anchored-datatype-name-re rng-c-current-token)) + +(defsubst rng-c-current-token-literal-p () + (string-match "\\`['\"]" rng-c-current-token)) + +(defsubst rng-c-current-token-quoted-identifier-p () + (string-match "\\`\\\\" rng-c-current-token)) + +(defsubst rng-c-current-token-ncname-p () + (string-match rng-c-anchored-ncname-re rng-c-current-token)) + +(defsubst rng-c-current-token-ns-name-p () + (let ((len (length rng-c-current-token))) + (and (> len 0) + (= (aref rng-c-current-token (- len 1)) ?*)))) + +;;; Namespaces + +(defvar rng-c-inherit-namespace nil) + +(defvar rng-c-default-namespace nil) + +(defvar rng-c-default-namespace-declared nil) + +(defvar rng-c-namespace-decls nil + "Alist of namespace declarations.") + +(defconst rng-c-no-namespace nil) + +(defun rng-c-declare-standard-namespaces () + (setq rng-c-namespace-decls + (cons (cons "xml" nxml-xml-namespace-uri) + rng-c-namespace-decls)) + (when (and (not rng-c-default-namespace-declared) + rng-c-inherit-namespace) + (setq rng-c-default-namespace rng-c-inherit-namespace))) + +(defun rng-c-expand-name (prefixed-name) + (let ((i (string-match ":" prefixed-name))) + (rng-make-name (rng-c-lookup-prefix (substring prefixed-name + 0 + i)) + (substring prefixed-name (+ i 1))))) + +(defun rng-c-lookup-prefix (prefix) + (let ((binding (assoc prefix rng-c-namespace-decls))) + (or binding (rng-c-error "Undefined prefix %s" prefix)) + (cdr binding))) + +(defun rng-c-unqualified-namespace (attribute) + (if attribute + rng-c-no-namespace + rng-c-default-namespace)) + +(defun rng-c-make-context () + (cons rng-c-default-namespace rng-c-namespace-decls)) + +;;; Datatypes + +(defconst rng-string-datatype + (rng-make-datatype rng-builtin-datatypes-uri "string")) + +(defconst rng-token-datatype + (rng-make-datatype rng-builtin-datatypes-uri "token")) + +(defvar rng-c-datatype-decls nil + "Alist of datatype declarations. +Contains a list of pairs (PREFIX . URI) where PREFIX is a string +and URI is a symbol.") + +(defun rng-c-declare-standard-datatypes () + (setq rng-c-datatype-decls + (cons (cons "xsd" rng-xsd-datatypes-uri) + rng-c-datatype-decls))) + +(defun rng-c-lookup-datatype-prefix (prefix) + (let ((binding (assoc prefix rng-c-datatype-decls))) + (or binding (rng-c-error "Undefined prefix %s" prefix)) + (cdr binding))) + +(defun rng-c-expand-datatype (prefixed-name) + (let ((i (string-match ":" prefixed-name))) + (rng-make-datatype + (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i)) + (substring prefixed-name (+ i 1))))) + +;;; Grammars + +(defvar rng-c-current-grammar nil) +(defvar rng-c-parent-grammar nil) + +(defun rng-c-make-grammar () + (make-hash-table :test 'equal)) + +(defconst rng-c-about-override-slot 0) +(defconst rng-c-about-combine-slot 1) + +(defun rng-c-lookup-create (name grammar) + "Return a def object for NAME. A def object is a pair +\(ABOUT . REF) where REF is returned by `rng-make-ref'. ABOUT is a +two-element vector [OVERRIDE COMBINE]. COMBINE is either nil, choice +or interleave. OVERRIDE is either nil, require or t." + (let ((def (gethash name grammar))) + (if def + def + (progn + (setq def (cons (vector nil nil) (rng-make-ref name))) + (puthash name def grammar) + def)))) + +(defun rng-c-make-ref (name) + (or rng-c-current-grammar + (rng-c-error "Reference not in a grammar")) + (cdr (rng-c-lookup-create name rng-c-current-grammar))) + +(defun rng-c-make-parent-ref (name) + (or rng-c-parent-grammar + (rng-c-error "Reference to non-existent parent grammar")) + (cdr (rng-c-lookup-create name rng-c-parent-grammar))) + +(defvar rng-c-overrides nil + "Contains a list of (NAME . DEF) pairs.") + +(defun rng-c-merge-combine (def combine name) + (let* ((about (car def)) + (current-combine (aref about rng-c-about-combine-slot))) + (if combine + (if current-combine + (or (eq combine current-combine) + (rng-c-error "Inconsistent combine for %s" name)) + (aset about rng-c-about-combine-slot combine)) + current-combine))) + +(defun rng-c-prepare-define (name combine in-include) + (let* ((def (rng-c-lookup-create name rng-c-current-grammar)) + (about (car def)) + (overridden (aref about rng-c-about-override-slot))) + (and in-include + (setq rng-c-overrides (cons (cons name def) rng-c-overrides))) + (cond (overridden (and (eq overridden 'require) + (aset about rng-c-about-override-slot t)) + nil) + (t (setq combine (rng-c-merge-combine def combine name)) + (and (rng-ref-get (cdr def)) + (not combine) + (rng-c-error "Duplicate definition of %s" name)) + def)))) + +(defun rng-c-start-include (overrides) + (mapcar (lambda (name-def) + (let* ((def (cdr name-def)) + (about (car def)) + (save (aref about rng-c-about-override-slot))) + (aset about rng-c-about-override-slot 'require) + (cons save name-def))) + overrides)) + +(defun rng-c-end-include (overrides) + (mapcar (lambda (o) + (let* ((saved (car o)) + (name-def (cdr o)) + (name (car name-def)) + (def (cdr name-def)) + (about (car def))) + (and (eq (aref about rng-c-about-override-slot) 'require) + (rng-c-error "Definition of %s in include did not override definition in included file" name)) + (aset about rng-c-about-override-slot saved))) + overrides)) + +(defun rng-c-define (def value) + (and def + (let ((current-value (rng-ref-get (cdr def)))) + (rng-ref-set (cdr def) + (if current-value + (if (eq (aref (car def) rng-c-about-combine-slot) + 'choice) + (rng-make-choice (list current-value value)) + (rng-make-interleave (list current-value value))) + value))))) + +(defun rng-c-finish-grammar () + (maphash (lambda (key def) + (or (rng-ref-get (cdr def)) + (rng-c-error "Reference to undefined pattern %s" key))) + rng-c-current-grammar) + (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar) + (rng-c-error "No definition of start"))))) + +;;; Parsing + +(defvar rng-c-escape-positions nil) +(make-variable-buffer-local 'rng-c-escape-positions) + +(defvar rng-c-file-name nil) +(make-variable-buffer-local 'rng-c-file-name) + +(defvar rng-c-file-index nil) + +(defun rng-c-parse-file (filename &optional context) + (save-excursion + (set-buffer (get-buffer-create (rng-c-buffer-name context))) + (erase-buffer) + (rng-c-init-buffer) + (setq rng-c-file-name + (car (insert-file-contents filename))) + (setq rng-c-escape-positions nil) + (rng-c-process-escapes) + (rng-c-parse-top-level context))) + +(defun rng-c-buffer-name (context) + (concat " *RNC Input" + (if context + (concat "<" + (number-to-string (setq rng-c-file-index + (1+ rng-c-file-index))) + ">*") + (setq rng-c-file-index 1) + "*"))) + +(defun rng-c-process-escapes () + ;; Check for any nuls, since we will use nul chars + ;; for internal purposes. + (let ((pos (search-forward "\C-@" nil t))) + (and pos + (rng-c-error "Nul character found (binary file?)"))) + (let ((offset 0)) + (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}" + (point-max) + t) + (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16)))) + (if (and ch (> ch 0)) + (let ((begin (match-beginning 0)) + (end (match-end 0))) + (delete-region begin end) + ;; Represent an escaped newline by nul, so + ;; that we can distinguish it from a literal newline. + ;; We will translate it back into a real newline later. + (insert (if (eq ch ?\n) 0 ch)) + (setq offset (+ offset (- end begin 1))) + (setq rng-c-escape-positions + (cons (cons (point) offset) + rng-c-escape-positions))) + (rng-c-error "Invalid character escape"))))) + (goto-char 1)) + +(defun rng-c-translate-position (pos) + (let ((tem rng-c-escape-positions)) + (while (and tem + (> (caar tem) pos)) + (setq tem (cdr tem))) + (if tem + (+ pos (cdar tem)) + pos))) + +(defun rng-c-error (&rest args) + (rng-c-signal-incorrect-schema rng-c-file-name + (rng-c-translate-position (point)) + (apply 'format args))) + +(defun rng-c-parse-top-level (context) + (let ((rng-c-namespace-decls nil) + (rng-c-default-namespace nil) + (rng-c-datatype-decls nil)) + (goto-char (point-min)) + (forward-comment (point-max)) + (rng-c-advance) + (rng-c-parse-decls) + (let ((p (if (eq context 'include) + (if (rng-c-implicit-grammar-p) + (rng-c-parse-grammar-body "") + (rng-c-parse-included-grammar)) + (if (rng-c-implicit-grammar-p) + (rng-c-parse-implicit-grammar) + (rng-c-parse-pattern))))) + (or (string-equal rng-c-current-token "") + (rng-c-error "Unexpected characters after pattern")) + p))) + +(defun rng-c-parse-included-grammar () + (or (string-equal rng-c-current-token "grammar") + (rng-c-error "Included schema is not a grammar")) + (rng-c-advance) + (rng-c-expect "{") + (rng-c-parse-grammar-body "}")) + +(defun rng-c-implicit-grammar-p () + (or (and (or (rng-c-current-token-prefixed-name-p) + (rng-c-current-token-quoted-identifier-p) + (and (rng-c-current-token-ncname-p) + (not (rng-c-current-token-keyword-p)))) + (looking-at "\\[")) + (and (string-equal rng-c-current-token "[") + (rng-c-parse-lead-annotation) + nil) + (member rng-c-current-token '("div" "include" "")) + (looking-at "[|&]?="))) + +(defun rng-c-parse-decls () + (setq rng-c-default-namespace-declared nil) + (while (progn + (let ((binding + (assoc rng-c-current-token + '(("namespace" . rng-c-parse-namespace) + ("datatypes" . rng-c-parse-datatypes) + ("default" . rng-c-parse-default))))) + (if binding + (progn + (rng-c-advance) + (funcall (cdr binding)) + t) + nil)))) + (rng-c-declare-standard-datatypes) + (rng-c-declare-standard-namespaces)) + +(defun rng-c-parse-datatypes () + (let ((prefix (rng-c-parse-identifier-or-keyword))) + (or (not (assoc prefix rng-c-datatype-decls)) + (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix)) + (rng-c-expect "=") + (setq rng-c-datatype-decls + (cons (cons prefix + (rng-make-datatypes-uri (rng-c-parse-literal))) + rng-c-datatype-decls)))) + +(defun rng-c-parse-namespace () + (rng-c-declare-namespace nil + (rng-c-parse-identifier-or-keyword))) + +(defun rng-c-parse-default () + (rng-c-expect "namespace") + (rng-c-declare-namespace t + (if (string-equal rng-c-current-token "=") + nil + (rng-c-parse-identifier-or-keyword)))) + +(defun rng-c-declare-namespace (declare-default prefix) + (rng-c-expect "=") + (let ((ns (cond ((string-equal rng-c-current-token "inherit") + (rng-c-advance) + rng-c-inherit-namespace) + (t + (nxml-make-namespace (rng-c-parse-literal)))))) + (and prefix + (or (not (assoc prefix rng-c-namespace-decls)) + (rng-c-error "Duplicate namespace declaration for prefix %s" + prefix)) + (setq rng-c-namespace-decls + (cons (cons prefix ns) rng-c-namespace-decls))) + (and declare-default + (or (not rng-c-default-namespace-declared) + (rng-c-error "Duplicate default namespace declaration")) + (setq rng-c-default-namespace-declared t) + (setq rng-c-default-namespace ns)))) + +(defun rng-c-parse-implicit-grammar () + (let* ((rng-c-parent-grammar rng-c-current-grammar) + (rng-c-current-grammar (rng-c-make-grammar))) + (rng-c-parse-grammar-body "") + (rng-c-finish-grammar))) + +(defun rng-c-parse-grammar-body (close-token &optional in-include) + (while (not (string-equal rng-c-current-token close-token)) + (cond ((rng-c-current-token-keyword-p) + (let ((kw (intern rng-c-current-token))) + (cond ((eq kw 'start) + (rng-c-parse-define 'start in-include)) + ((eq kw 'div) + (rng-c-advance) + (rng-c-parse-div in-include)) + ((eq kw 'include) + (and in-include + (rng-c-error "Nested include")) + (rng-c-advance) + (rng-c-parse-include)) + (t (rng-c-error "Invalid grammar keyword"))))) + ((rng-c-current-token-ncname-p) + (if (looking-at "\\[") + (rng-c-parse-annotation-element) + (rng-c-parse-define rng-c-current-token + in-include))) + ((rng-c-current-token-quoted-identifier-p) + (if (looking-at "\\[") + (rng-c-parse-annotation-element) + (rng-c-parse-define (substring rng-c-current-token 1) + in-include))) + ((rng-c-current-token-prefixed-name-p) + (rng-c-parse-annotation-element)) + ((string-equal rng-c-current-token "[") + (rng-c-parse-lead-annotation) + (and (string-equal rng-c-current-token close-token) + (rng-c-error "Missing annotation subject")) + (and (looking-at "\\[") + (rng-c-error "Leading annotation applied to annotation"))) + (t (rng-c-error "Invalid grammar content")))) + (or (string-equal rng-c-current-token "") + (rng-c-advance))) + +(defun rng-c-parse-div (in-include) + (rng-c-expect "{") + (rng-c-parse-grammar-body "}" in-include)) + +(defun rng-c-parse-include () + (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) + (rng-c-inherit-namespace (rng-c-parse-opt-inherit)) + overrides) + (cond ((string-equal rng-c-current-token "{") + (rng-c-advance) + (let ((rng-c-overrides nil)) + (rng-c-parse-grammar-body "}" t) + (setq overrides rng-c-overrides)) + (setq overrides (rng-c-start-include overrides)) + (rng-c-parse-file filename 'include) + (rng-c-end-include overrides)) + (t (rng-c-parse-file filename 'include))))) + +(defun rng-c-parse-define (name in-include) + (rng-c-advance) + (let ((assign (assoc rng-c-current-token + '(("=" . nil) + ("|=" . choice) + ("&=" . interleave))))) + (or assign + (rng-c-error "Expected assignment operator")) + (rng-c-advance) + (let ((ref (rng-c-prepare-define name (cdr assign) in-include))) + (rng-c-define ref (rng-c-parse-pattern))))) + +(defvar rng-c-had-except nil) + +(defun rng-c-parse-pattern () + (let* ((rng-c-had-except nil) + (p (rng-c-parse-repeated)) + (op (assoc rng-c-current-token + '(("|" . rng-make-choice) + ("," . rng-make-group) + ("&" . rng-make-interleave))))) + (if op + (if rng-c-had-except + (rng-c-error "Parentheses required around pattern using -") + (let* ((patterns (cons p nil)) + (tail patterns) + (connector rng-c-current-token)) + (while (progn + (rng-c-advance) + (let ((newcdr (cons (rng-c-parse-repeated) nil))) + (setcdr tail newcdr) + (setq tail newcdr)) + (string-equal rng-c-current-token connector))) + (funcall (cdr op) patterns))) + p))) + +(defun rng-c-parse-repeated () + (let ((p (rng-c-parse-follow-annotations + (rng-c-parse-primary))) + (op (assoc rng-c-current-token + '(("*" . rng-make-zero-or-more) + ("+" . rng-make-one-or-more) + ("?" . rng-make-optional))))) + (if op + (if rng-c-had-except + (rng-c-error "Parentheses required around pattern using -") + (rng-c-parse-follow-annotations + (progn + (rng-c-advance) + (funcall (cdr op) p)))) + p))) + +(defun rng-c-parse-primary () + "Parse a primary expression. The current token must be the first +token of the expression. After parsing the current token should be +token following the primary expression." + (cond ((rng-c-current-token-keyword-p) + (let ((parse-function (get (intern rng-c-current-token) + 'rng-c-pattern))) + (or parse-function + (rng-c-error "Keyword %s does not introduce a pattern" + rng-c-current-token)) + (rng-c-advance) + (funcall parse-function))) + ((rng-c-current-token-ncname-p) + (rng-c-advance-with (rng-c-make-ref rng-c-current-token))) + ((string-equal rng-c-current-token "(") + (rng-c-advance) + (let ((p (rng-c-parse-pattern))) + (rng-c-expect ")") + p)) + ((rng-c-current-token-prefixed-name-p) + (let ((name (rng-c-expand-datatype rng-c-current-token))) + (rng-c-advance) + (rng-c-parse-data name))) + ((rng-c-current-token-literal-p) + (rng-make-value rng-token-datatype (rng-c-parse-literal) nil)) + ((rng-c-current-token-quoted-identifier-p) + (rng-c-advance-with + (rng-c-make-ref (substring rng-c-current-token 1)))) + ((string-equal rng-c-current-token "[") + (rng-c-parse-lead-annotation) + (rng-c-parse-primary)) + (t (rng-c-error "Invalid pattern")))) + +(defun rng-c-parse-parent () + (and (rng-c-current-token-keyword-p) + (rng-c-error "Keyword following parent was not quoted" + rng-c-current-token)) + (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword))) + +(defun rng-c-parse-literal () + (rng-c-fix-escaped-newlines + (apply 'concat (rng-c-parse-literal-segments)))) + +(defun rng-c-parse-literal-segments () + (let ((str (rng-c-parse-literal-segment))) + (cons str + (cond ((string-equal rng-c-current-token "~") + (rng-c-advance) + (rng-c-parse-literal-segments)) + (t nil))))) + +(defun rng-c-parse-literal-segment () + (or (rng-c-current-token-literal-p) + (rng-c-error "Expected a literal")) + (rng-c-advance-with + (let ((n (if (and (>= (length rng-c-current-token) 6) + (eq (aref rng-c-current-token 0) + (aref rng-c-current-token 1))) + 3 + 1))) + (substring rng-c-current-token n (- n))))) + +(defun rng-c-fix-escaped-newlines (str) + (let ((pos 0)) + (while (progn + (let ((n (string-match "\C-@" str pos))) + (and n + (aset str n ?\n) + (setq pos (1+ n))))))) + str) + +(defun rng-c-parse-identifier-or-keyword () + (cond ((rng-c-current-token-ncname-p) + (rng-c-advance-with rng-c-current-token)) + ((rng-c-current-token-quoted-identifier-p) + (rng-c-advance-with (substring rng-c-current-token 1))) + (t (rng-c-error "Expected identifier or keyword")))) + +(put 'string 'rng-c-pattern 'rng-c-parse-string) +(put 'token 'rng-c-pattern 'rng-c-parse-token) +(put 'element 'rng-c-pattern 'rng-c-parse-element) +(put 'attribute 'rng-c-pattern 'rng-c-parse-attribute) +(put 'list 'rng-c-pattern 'rng-c-parse-list) +(put 'mixed 'rng-c-pattern 'rng-c-parse-mixed) +(put 'text 'rng-c-pattern 'rng-c-parse-text) +(put 'empty 'rng-c-pattern 'rng-c-parse-empty) +(put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed) +(put 'grammar 'rng-c-pattern 'rng-c-parse-grammar) +(put 'parent 'rng-c-pattern 'rng-c-parse-parent) +(put 'external 'rng-c-pattern 'rng-c-parse-external) + +(defun rng-c-parse-element () + (let ((name-class (rng-c-parse-name-class nil))) + (rng-c-expect "{") + (let ((pattern (rng-c-parse-pattern))) + (rng-c-expect "}") + (rng-make-element name-class pattern)))) + +(defun rng-c-parse-attribute () + (let ((name-class (rng-c-parse-name-class 'attribute))) + (rng-c-expect "{") + (let ((pattern (rng-c-parse-pattern))) + (rng-c-expect "}") + (rng-make-attribute name-class pattern)))) + +(defun rng-c-parse-name-class (attribute) + (let* ((rng-c-had-except nil) + (name-class + (rng-c-parse-follow-annotations + (rng-c-parse-primary-name-class attribute)))) + (if (string-equal rng-c-current-token "|") + (let* ((name-classes (cons name-class nil)) + (tail name-classes)) + (or (not rng-c-had-except) + (rng-c-error "Parentheses required around name-class using - operator")) + (while (progn + (rng-c-advance) + (let ((newcdr + (cons (rng-c-parse-follow-annotations + (rng-c-parse-primary-name-class attribute)) + nil))) + (setcdr tail newcdr) + (setq tail newcdr)) + (string-equal rng-c-current-token "|"))) + (rng-make-choice-name-class name-classes)) + name-class))) + +(defun rng-c-parse-primary-name-class (attribute) + (cond ((rng-c-current-token-ncname-p) + (rng-c-advance-with + (rng-make-name-name-class + (rng-make-name (rng-c-unqualified-namespace attribute) + rng-c-current-token)))) + ((rng-c-current-token-prefixed-name-p) + (rng-c-advance-with + (rng-make-name-name-class + (rng-c-expand-name rng-c-current-token)))) + ((string-equal rng-c-current-token "*") + (let ((except (rng-c-parse-opt-except-name-class attribute))) + (if except + (rng-make-any-name-except-name-class except) + (rng-make-any-name-name-class)))) + ((rng-c-current-token-ns-name-p) + (let* ((ns + (rng-c-lookup-prefix (substring rng-c-current-token + 0 + -2))) + (except (rng-c-parse-opt-except-name-class attribute))) + (if except + (rng-make-ns-name-except-name-class ns except) + (rng-make-ns-name-name-class ns)))) + ((string-equal rng-c-current-token "(") + (rng-c-advance) + (let ((name-class (rng-c-parse-name-class attribute))) + (rng-c-expect ")") + name-class)) + ((rng-c-current-token-quoted-identifier-p) + (rng-c-advance-with + (rng-make-name-name-class + (rng-make-name (rng-c-unqualified-namespace attribute) + (substring rng-c-current-token 1))))) + ((string-equal rng-c-current-token "[") + (rng-c-parse-lead-annotation) + (rng-c-parse-primary-name-class attribute)) + (t (rng-c-error "Bad name class")))) + +(defun rng-c-parse-opt-except-name-class (attribute) + (rng-c-advance) + (and (string-equal rng-c-current-token "-") + (or (not rng-c-had-except) + (rng-c-error "Parentheses required around name-class using - operator")) + (setq rng-c-had-except t) + (progn + (rng-c-advance) + (rng-c-parse-primary-name-class attribute)))) + +(defun rng-c-parse-mixed () + (rng-c-expect "{") + (let ((pattern (rng-make-mixed (rng-c-parse-pattern)))) + (rng-c-expect "}") + pattern)) + +(defun rng-c-parse-list () + (rng-c-expect "{") + (let ((pattern (rng-make-list (rng-c-parse-pattern)))) + (rng-c-expect "}") + pattern)) + +(defun rng-c-parse-text () + (rng-make-text)) + +(defun rng-c-parse-empty () + (rng-make-empty)) + +(defun rng-c-parse-not-allowed () + (rng-make-not-allowed)) + +(defun rng-c-parse-string () + (rng-c-parse-data rng-string-datatype)) + +(defun rng-c-parse-token () + (rng-c-parse-data rng-token-datatype)) + +(defun rng-c-parse-data (name) + (if (rng-c-current-token-literal-p) + (rng-make-value name + (rng-c-parse-literal) + (and (car name) + (rng-c-make-context))) + (let ((params (rng-c-parse-optional-params))) + (if (string-equal rng-c-current-token "-") + (progn + (if rng-c-had-except + (rng-c-error "Parentheses required around pattern using -") + (setq rng-c-had-except t)) + (rng-c-advance) + (rng-make-data-except name + params + (rng-c-parse-primary))) + (rng-make-data name params))))) + +(defun rng-c-parse-optional-params () + (and (string-equal rng-c-current-token "{") + (let* ((head (cons nil nil)) + (tail head)) + (rng-c-advance) + (while (not (string-equal rng-c-current-token "}")) + (and (string-equal rng-c-current-token "[") + (rng-c-parse-lead-annotation)) + (let ((name (rng-c-parse-identifier-or-keyword))) + (rng-c-expect "=") + (let ((newcdr (cons (cons (intern name) + (rng-c-parse-literal)) + nil))) + (setcdr tail newcdr) + (setq tail newcdr)))) + (rng-c-advance) + (cdr head)))) + +(defun rng-c-parse-external () + (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) + (rng-c-inherit-namespace (rng-c-parse-opt-inherit))) + (rng-c-parse-file filename 'external))) + +(defun rng-c-expand-file (uri) + (condition-case err + (rng-uri-file-name (rng-uri-resolve uri + (rng-file-name-uri rng-c-file-name))) + (rng-uri-error + (rng-c-error (cadr err))))) + +(defun rng-c-parse-opt-inherit () + (cond ((string-equal rng-c-current-token "inherit") + (rng-c-advance) + (rng-c-expect "=") + (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword))) + (t rng-c-default-namespace))) + +(defun rng-c-parse-grammar () + (rng-c-expect "{") + (let* ((rng-c-parent-grammar rng-c-current-grammar) + (rng-c-current-grammar (rng-c-make-grammar))) + (rng-c-parse-grammar-body "}") + (rng-c-finish-grammar))) + +(defun rng-c-parse-lead-annotation () + (rng-c-parse-annotation-body) + (and (string-equal rng-c-current-token "[") + (rng-c-error "Multiple leading annotations"))) + +(defun rng-c-parse-follow-annotations (obj) + (while (string-equal rng-c-current-token ">>") + (rng-c-advance) + (if (rng-c-current-token-prefixed-name-p) + (rng-c-advance) + (rng-c-parse-identifier-or-keyword)) + (rng-c-parse-annotation-body t)) + obj) + +(defun rng-c-parse-annotation-element () + (rng-c-advance) + (rng-c-parse-annotation-body t)) + +;; XXX need stricter checking of attribute names +;; XXX don't allow attributes after text + +(defun rng-c-parse-annotation-body (&optional allow-text) + "Current token is [. Parse up to matching ]. Current token after +parse is token following ]." + (or (string-equal rng-c-current-token "[") + (rng-c-error "Expected [")) + (rng-c-advance) + (while (not (string-equal rng-c-current-token "]")) + (cond ((rng-c-current-token-literal-p) + (or allow-text + (rng-c-error "Out of place text within annotation")) + (rng-c-parse-literal)) + (t + (if (rng-c-current-token-prefixed-name-p) + (rng-c-advance) + (rng-c-parse-identifier-or-keyword)) + (cond ((string-equal rng-c-current-token "[") + (rng-c-parse-annotation-body t)) + ((string-equal rng-c-current-token "=") + (rng-c-advance) + (rng-c-parse-literal)) + (t (rng-c-error "Expected = or [")))))) + (rng-c-advance)) + +(defun rng-c-advance-with (pattern) + (rng-c-advance) + pattern) + +(defun rng-c-expect (str) + (or (string-equal rng-c-current-token str) + (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token)) + (rng-c-advance)) + +(provide 'rng-cmpct) + +;;; rng-cmpct.el |