diff options
Diffstat (limited to 'lisp/nxml/rng-uri.el')
-rw-r--r-- | lisp/nxml/rng-uri.el | 355 |
1 files changed, 355 insertions, 0 deletions
diff --git a/lisp/nxml/rng-uri.el b/lisp/nxml/rng-uri.el new file mode 100644 index 00000000000..7ed8befc90e --- /dev/null +++ b/lisp/nxml/rng-uri.el @@ -0,0 +1,355 @@ +;;; rng-uri.el --- URI parsing and manipulation + +;; Copyright (C) 2003 Free Software Foundation, Inc. + +;; Author: James Clark +;; Keywords: XML + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2 of +;; the License, or (at your option) any later version. + +;; This program is distributed in the hope that it will be +;; useful, but WITHOUT ANY WARRANTY; without even the implied +;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR +;; PURPOSE. See the GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public +;; License along with this program; if not, write to the Free +;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +;; MA 02111-1307 USA + +;;; Commentary: + +;;; Code: + +(defun rng-file-name-uri (f) + "Return a URI for the filename F. +Multibyte characters are left as is. Use `rng-uri-escape-multibyte' to +escape them using %HH." + (setq f (expand-file-name f)) + (let ((url + (replace-regexp-in-string "[\000-\032\177<>#%\"{}|\\^[]`%?;]" + 'rng-percent-encode + f))) + (concat "file:" + (if (and (> (length url) 0) + (= (aref url 0) ?/)) + "//" + "///") + url))) + +(defun rng-uri-escape-multibyte (uri) + "Escape multibyte characters in URI." + (replace-regexp-in-string "[:nonascii:]" + 'rng-percent-encode + (encode-coding-string uri 'utf-8))) + +(defun rng-percent-encode (str) + (apply 'concat + (mapcar (lambda (ch) + (format "%%%x%x" (/ ch 16) (% ch 16))) + (string-to-list str)))) + + +(defun rng-uri-file-name (uri) + "Return the filename represented by a URI. +Signal an error if URI is not a valid file URL." + (rng-uri-file-name-1 uri nil)) + +(defun rng-uri-pattern-file-name-regexp (pattern) + "Return a regexp for filenames represented by URIs that match PATTERN." + (rng-uri-file-name-1 pattern 'match)) + +(defun rng-uri-pattern-file-name-replace-match (pattern) + (rng-uri-file-name-1 pattern 'replace)) + +;; pattern is either nil or match or replace +(defun rng-uri-file-name-1 (uri pattern) + (unless (string-match "\\`\\(?:[^%]\\|%[0-9a-fA-F]{2}\\)*\\'" uri) + (rng-uri-error "Bad escapes in URI `%s'" uri)) + (setq uri (rng-uri-unescape-multibyte uri)) + (let* ((components + (or (rng-uri-split uri) + (rng-uri-error "Cannot split URI `%s' into its components" uri))) + (scheme (nth 0 components)) + (authority (nth 1 components)) + (path (nth 2 components)) + (absolutep (string-match "\\`/" path)) + (query (nth 3 components)) + (fragment-id (nth 4 components))) + (cond ((not scheme) + (unless pattern + (rng-uri-error "URI `%s' does not have a scheme" uri))) + ((not (string= (downcase scheme) "file")) + (rng-uri-error "URI `%s' does not use the `file:' scheme" uri))) + (when (not (member authority + (cons system-name '(nil "" "localhost")))) + (rng-uri-error "URI `%s' does not start with `file:///' or `file://localhost/'" + uri)) + (when query + (rng-uri-error "`?' not escaped in file URI `%s'" uri)) + (when fragment-id + (rng-uri-error "URI `%s' has a fragment identifier" uri)) + (when (string-match ";" path) + (rng-uri-error "`;' not escaped in URI `%s'" uri)) + (when (string-match "%2[fF]" path) ;; 2f is hex code of slash + (rng-uri-error "Escaped slash in URI `%s'" uri)) + (when (and (eq system-type 'windows-nt) + absolutep + (file-name-absolute-p (substring path 1))) + (setq path (substring path 1))) + (when (and pattern (string-match "\\`\\./" path)) + (setq path (substring path 2))) + (setq path + (cond ((eq pattern 'match) + (rng-uri-unescape-unibyte-match path)) + ((eq pattern 'replace) + (rng-uri-unescape-unibyte-replace path 2)) + (t + (rng-uri-unescape-unibyte path)))) + (when (string-match "\000" path) + (rng-uri-error "URI `%s' has NUL character in path" uri)) + (when (eq pattern 'match) + (setq path + (concat (if absolutep + "\\(\\)" + "\\(\\(?:[^/]*/\\)*\\)") + path))) + (cond ((eq pattern 'match) + (concat "\\`" path "\\'")) + ((and (eq pattern 'replace) + (not absolutep)) + (concat "\\1" path)) + (t path)))) + +(defun rng-uri-error (&rest args) + (signal 'rng-uri-error (list (apply 'format args)))) + +(put 'rng-uri-error 'error-conditions '(error rng-uri-error)) +(put 'rng-uri-error 'error-message "Invalid URI") + +(defun rng-uri-split (str) + (and (string-match "\\`\\(?:\\([^:/?#]+\\):\\)?\ +\\(?://\\([^/?#]*\\)\\)?\ +\\([^?#]*\\)\ +\\(?:\\?\\([^#]*\\)\\)?\ +\\(?:#\\(\\(?:.\\|\n\\)*\\)\\)?\\'" + str) + (list (match-string 1 str) + (match-string 2 str) + (match-string 3 str) + (match-string 4 str) + (match-string 5 str)))) + +(defun rng-uri-join (scheme authority path &optional query fragment-id) + (when path + (let (parts) + (when fragment-id + (setq parts (list "#" fragment-id))) + (when query + (setq parts + (cons "?" + (cons query parts)))) + (setq parts (cons path parts)) + (when authority + (setq parts + (cons "//" + (cons authority parts)))) + (when scheme + (setq parts + (cons scheme + (cons ":" parts)))) + (apply 'concat parts)))) + +(defun rng-uri-resolve (uri-ref base-uri) + "Resolve a possibly relative URI reference into absolute form. +URI-REF is the URI reference to be resolved. +BASE-URI is the base URI to use for resolving it. +The algorithm is specified by RFC 2396. +If there is some problem with URI-REF or BASE-URI, then +URI-REF will be returned." + (let* ((components (rng-uri-split uri-ref)) + (scheme (nth 0 components)) + (authority (nth 1 components)) + (path (nth 2 components)) + (query (nth 3 components)) + (fragment-id (nth 4 components)) + (base-components (rng-uri-split base-uri))) + (if (or (not components) + scheme + (not base-components) + (not (nth 0 base-components))) + uri-ref + (setq scheme (nth 0 base-components)) + (when (not authority) + (setq authority (nth 1 base-components)) + (if (and (equal path "") (not query)) + ;; Handle same document reference by returning + ;; same URI (RFC 2396bis does this too). + (setq path (nth 2 base-components) + query (nth 3 base-components)) + (setq path (rng-resolve-path path (nth 2 base-components))))) + (rng-uri-join scheme + authority + path + query + fragment-id)))) + +;; See RFC 2396 5.2, steps 5 and 6 +(defun rng-resolve-path (path base-path) + ;; Step 5 + (if (or (string-match "\\`/" path) + (not (string-match "\\`/" base-path))) + path + ;; Step 6 + ;; (a), (b) + (let ((segments (rng-split-path path)) + (base-segments (rng-split-path base-path))) + (if (> (length base-segments) 1) + (setq segments (nconc (nbutlast base-segments) + segments)) + (setcar segments + (concat (car base-segments) (car segments)))) + ;; (d) + (let ((last-segment (last segments))) + (when (equal (car last-segment) ".") + (setcar last-segment ""))) + ;; (c) + (setq segments (delete "." segments)) + ;; (e) + (let (iter matched) + (while (progn + (setq matched nil) + (setq iter (cdr segments)) + (while (and iter (not matched)) + (if (or (not (equal (cadr iter) "..")) + (equal (car iter) "..")) + (setq iter (cdr iter)) + (setcar iter nil) + (setcar (cdr iter) + ;; (f) + (if (cddr iter) nil "")) + (setq matched t) + (setq segments (delq nil segments)))) + matched))) + (rng-join-path segments)))) + +(defun rng-relative-uri (full base) + "Return a URI that relative to BASE is equivalent to FULL. +The returned URI will be relative if possible. +Both FULL and BASE must be absolute URIs." + (let* ((components (rng-uri-split full)) + (scheme (nth 0 components)) + (authority (nth 1 components)) + (path (nth 2 components)) + (query (nth 3 components)) + (fragment-id (nth 4 components)) + (base-components (rng-uri-split base))) + (if (and components + base-components + scheme + (equal scheme + (nth 0 base-components))) + (progn + (setq scheme nil) + (when (and authority + (equal authority + (nth 1 base-components))) + (setq authority nil) + (setq path (rng-relative-path path (nth 2 base-components)))) + (rng-uri-join scheme authority path query fragment-id)) + full))) + +(defun rng-relative-path (path base-path) + (let ((segments (rng-split-path path)) + (base-segments (rng-split-path base-path))) + (when (> (length base-segments) 1) + (setq base-segments (nbutlast base-segments))) + (if (or (member "." segments) + (member ".." segments) + (member "." base-segments) + (member ".." base-segments)) + path + (while (and segments + base-segments + (string= (car segments) + (car base-segments))) + (setq segments (cdr segments)) + (setq base-segments (cdr base-segments))) + (while base-segments + (setq base-segments (cdr base-segments)) + (setq segments (cons ".." segments))) + (when (equal (car segments) "") + (setq segments (cons "." segments))) + (rng-join-path segments)))) + +(defun rng-split-path (path) + (let ((start 0) + segments) + (while (string-match "/" path start) + (setq segments (cons (substring path start (match-beginning 0)) + segments)) + (setq start (match-end 0))) + (nreverse (cons (substring path start) segments)))) + +(defun rng-join-path (segments) + (and segments + (mapconcat 'identity segments "/"))) + +(defun rng-uri-unescape-multibyte (str) + (replace-regexp-in-string "\\(?:%[89a-fA-F][0-9a-fA-F]\\)+" + 'rng-multibyte-percent-decode + str)) + +(defun rng-multibyte-percent-decode (str) + (decode-coding-string (apply 'string + (mapcar (lambda (h) (string-to-number h 16)) + (split-string str "%"))) + 'utf-8)) + +(defun rng-uri-unescape-unibyte (str) + (replace-regexp-in-string "%[0-7][0-9a-fA-F]" + (lambda (h) + (string-to-number (substring h 1) 16)) + str + t + t)) + +(defun rng-uri-unescape-unibyte-match (str) + (replace-regexp-in-string "%[0-7][0-9a-fA-F]\\|[^%]" + (lambda (match) + (if (string= match "*") + "\\([^/]*\\)" + (regexp-quote + (if (= (length match) 1) + match + (string-to-number (substring match 1) + 16))))) + str + t + t)) + +(defun rng-uri-unescape-unibyte-replace (str next-match-index) + (replace-regexp-in-string + "%[0-7][0-9a-fA-F]\\|[^%]" + (lambda (match) + (if (string= match "*") + (let ((n next-match-index)) + (setq next-match-index (1+ n)) + (format "\\%s" n)) + (let ((ch (if (= (length match) 1) + (aref match 0) + (string-to-number (substring match 1) + 16)))) + (if (eq ch ?\\) + (string ?\\ ?\\) + (string ch))))) + str + t + t)) + +(provide 'rng-uri) + +;;; rng-uri.el ends here |