diff options
Diffstat (limited to 'lisp/net/shr.el')
-rw-r--r-- | lisp/net/shr.el | 745 |
1 files changed, 407 insertions, 338 deletions
diff --git a/lisp/net/shr.el b/lisp/net/shr.el index dcc1654aba8..a0c9eba4144 100644 --- a/lisp/net/shr.el +++ b/lisp/net/shr.el @@ -33,11 +33,13 @@ (eval-when-compile (require 'cl)) (eval-when-compile (require 'url)) ;For url-filename's setf handler. (require 'browse-url) +(require 'subr-x) +(require 'dom) (defgroup shr nil "Simple HTML Renderer" - :version "24.1" - :group 'hypermedia) + :version "25.1" + :group 'web) (defcustom shr-max-image-proportion 0.9 "How big pictures displayed are in relation to the window they're in. @@ -76,11 +78,12 @@ If nil, don't draw horizontal table lines." :group 'shr :type 'character) -(defcustom shr-width fill-column +(defcustom shr-width nil "Frame width to use for rendering. May either be an integer specifying a fixed width in characters, or nil, meaning that the full width of the window should be used." + :version "25.1" :type '(choice (integer :tag "Fixed width in characters") (const :tag "Use the width of the window" nil)) :group 'shr) @@ -123,19 +126,24 @@ cid: URL as the argument.") "Font for link elements." :group 'shr) +(defvar shr-inhibit-images nil + "If non-nil, inhibit loading images.") + ;;; Internal variables. (defvar shr-folding-mode nil) (defvar shr-state nil) (defvar shr-start nil) (defvar shr-indentation 0) -(defvar shr-inhibit-images nil) +(defvar shr-internal-width (or shr-width (1- (window-width)))) (defvar shr-list-mode nil) (defvar shr-content-cache nil) (defvar shr-kinsoku-shorten nil) (defvar shr-table-depth 0) (defvar shr-stylesheet nil) (defvar shr-base nil) +(defvar shr-depth 0) +(defvar shr-warning nil) (defvar shr-ignore-cache nil) (defvar shr-external-rendering-functions nil) (defvar shr-target-id nil) @@ -197,9 +205,13 @@ DOM should be a parse tree as generated by (shr-state nil) (shr-start nil) (shr-base nil) - (shr-width (or shr-width (1- (window-width))))) - (shr-descend (shr-transform-dom dom)) - (shr-remove-trailing-whitespace start (point)))) + (shr-depth 0) + (shr-warning nil) + (shr-internal-width (or shr-width (1- (window-width))))) + (shr-descend dom) + (shr-remove-trailing-whitespace start (point)) + (when shr-warning + (message "%s" shr-warning)))) (defun shr-remove-trailing-whitespace (start end) (let ((width (window-width))) @@ -214,12 +226,16 @@ DOM should be a parse tree as generated by (overlay-put overlay 'before-string nil)))) (forward-line 1))))) -(defun shr-copy-url () +(defun shr-copy-url (&optional image-url) "Copy the URL under point to the kill ring. +If IMAGE-URL (the prefix) is non-nil, or there is no link under +point, but there is an image under point then copy the URL of the +image under point instead. If called twice, then try to fetch the URL and see whether it redirects somewhere else." - (interactive) - (let ((url (get-text-property (point) 'shr-url))) + (interactive "P") + (let ((url (or (get-text-property (point) 'shr-url) + (get-text-property (point) 'image-url)))) (cond ((not url) (message "No URL under point")) @@ -242,16 +258,17 @@ redirects somewhere else." ;; Copy the URL to the kill ring. (t (with-temp-buffer - (insert url) + (insert (url-encode-url url)) (copy-region-as-kill (point-min) (point-max)) - (message "Copied %s" url)))))) + (message "Copied %s" (buffer-string))))))) (defun shr-next-link () "Skip to the next link." (interactive) (let ((skip (text-property-any (point) (point-max) 'help-echo nil))) - (if (not (setq skip (text-property-not-all skip (point-max) - 'help-echo nil))) + (if (or (eobp) + (not (setq skip (text-property-not-all skip (point-max) + 'help-echo nil)))) (message "No next link") (goto-char skip) (message "%s" (get-text-property (point) 'help-echo))))) @@ -286,7 +303,7 @@ redirects somewhere else." (let ((text (get-text-property (point) 'shr-alt))) (if (not text) (message "No image under point") - (message "%s" text)))) + (message "%s" (shr-fold-text text))))) (defun shr-browse-image (&optional copy-url) "Browse the image under point. @@ -353,78 +370,80 @@ size, and full-buffer size." ;;; Utility functions. -(defun shr-transform-dom (dom) - (let ((result (list (pop dom)))) - (dolist (arg (pop dom)) - (push (cons (intern (concat ":" (symbol-name (car arg))) obarray) - (cdr arg)) - result)) - (dolist (sub dom) - (if (stringp sub) - (push (cons 'text sub) result) - (push (shr-transform-dom sub) result))) - (nreverse result))) - -(defsubst shr-generic (cont) - (dolist (sub cont) - (cond - ((eq (car sub) 'text) - (shr-insert (cdr sub))) - ((listp (cdr sub)) - (shr-descend sub))))) +(defsubst shr-generic (dom) + (dolist (sub (dom-children dom)) + (if (stringp sub) + (shr-insert sub) + (shr-descend sub)))) (defun shr-descend (dom) (let ((function (or ;; Allow other packages to override (or provide) rendering ;; of elements. - (cdr (assq (car dom) shr-external-rendering-functions)) - (intern (concat "shr-tag-" (symbol-name (car dom))) obarray))) - (style (cdr (assq :style (cdr dom)))) + (cdr (assq (dom-tag dom) shr-external-rendering-functions)) + (intern (concat "shr-tag-" (symbol-name (dom-tag dom))) obarray))) + (style (dom-attr dom 'style)) (shr-stylesheet shr-stylesheet) + (shr-depth (1+ shr-depth)) (start (point))) - (when style - (if (string-match "color\\|display\\|border-collapse" style) - (setq shr-stylesheet (nconc (shr-parse-style style) - shr-stylesheet)) - (setq style nil))) - ;; If we have a display:none, then just ignore this part of the DOM. - (unless (equal (cdr (assq 'display shr-stylesheet)) "none") - (if (fboundp function) - (funcall function (cdr dom)) - (shr-generic (cdr dom))) - (when (and shr-target-id - (equal (cdr (assq :id (cdr dom))) shr-target-id)) - ;; If the element was empty, we don't have anything to put the - ;; anchor on. So just insert a dummy character. - (when (= start (point)) - (insert "*")) - (put-text-property start (1+ start) 'shr-target-id shr-target-id)) - ;; If style is set, then this node has set the color. + ;; shr uses about 12 frames per nested node. + (if (> shr-depth (/ max-specpdl-size 12)) + (setq shr-warning "Too deeply nested to render properly; consider increasing `max-specpdl-size'") (when style - (shr-colorize-region start (point) - (cdr (assq 'color shr-stylesheet)) - (cdr (assq 'background-color shr-stylesheet))))))) - -(defmacro shr-char-breakable-p (char) + (if (string-match "color\\|display\\|border-collapse" style) + (setq shr-stylesheet (nconc (shr-parse-style style) + shr-stylesheet)) + (setq style nil))) + ;; If we have a display:none, then just ignore this part of the DOM. + (unless (equal (cdr (assq 'display shr-stylesheet)) "none") + (if (fboundp function) + (funcall function dom) + (shr-generic dom)) + (when (and shr-target-id + (equal (dom-attr dom 'id) shr-target-id)) + ;; If the element was empty, we don't have anything to put the + ;; anchor on. So just insert a dummy character. + (when (= start (point)) + (insert "*")) + (put-text-property start (1+ start) 'shr-target-id shr-target-id)) + ;; If style is set, then this node has set the color. + (when style + (shr-colorize-region + start (point) + (cdr (assq 'color shr-stylesheet)) + (cdr (assq 'background-color shr-stylesheet)))))))) + +(defun shr-fold-text (text) + (if (zerop (length text)) + text + (with-temp-buffer + (let ((shr-indentation 0) + (shr-state nil) + (shr-start nil) + (shr-internal-width (window-width))) + (shr-insert text) + (buffer-string))))) + +(define-inline shr-char-breakable-p (char) "Return non-nil if a line can be broken before and after CHAR." - `(aref fill-find-break-point-function-table ,char)) -(defmacro shr-char-nospace-p (char) + (inline-quote (aref fill-find-break-point-function-table ,char))) +(define-inline shr-char-nospace-p (char) "Return non-nil if no space is required before and after CHAR." - `(aref fill-nospace-between-words-table ,char)) + (inline-quote (aref fill-nospace-between-words-table ,char))) ;; KINSOKU is a Japanese word meaning a rule that should not be violated. ;; In Emacs, it is a term used for characters, e.g. punctuation marks, ;; parentheses, and so on, that should not be placed in the beginning ;; of a line or the end of a line. -(defmacro shr-char-kinsoku-bol-p (char) +(define-inline shr-char-kinsoku-bol-p (char) "Return non-nil if a line ought not to begin with CHAR." - `(let ((char ,char)) - (and (not (eq char ?')) - (aref (char-category-set char) ?>)))) -(defmacro shr-char-kinsoku-eol-p (char) + (inline-letevals (char) + (inline-quote (and (not (eq ,char ?')) + (aref (char-category-set ,char) ?>))))) +(define-inline shr-char-kinsoku-eol-p (char) "Return non-nil if a line ought not to end with CHAR." - `(aref (char-category-set ,char) ?<)) + (inline-quote (aref (char-category-set ,char) ?<))) (unless (shr-char-kinsoku-bol-p (make-char 'japanese-jisx0208 33 35)) (load "kinsoku" nil t)) @@ -467,8 +486,8 @@ size, and full-buffer size." (insert elem) (setq shr-state nil) (let (found) - (while (and (> (current-column) shr-width) - (> shr-width 0) + (while (and (> (current-column) shr-internal-width) + (> shr-internal-width 0) (progn (setq found (shr-find-fill-point)) (not (eolp)))) @@ -482,10 +501,10 @@ size, and full-buffer size." (when (> shr-indentation 0) (shr-indent)) (end-of-line)) - (if (<= (current-column) shr-width) + (if (<= (current-column) shr-internal-width) (insert " ") ;; In case we couldn't get a valid break point (because of a - ;; word that's longer than `shr-width'), just break anyway. + ;; word that's longer than `shr-internal-width'), just break anyway. (insert "\n") (when (> shr-indentation 0) (shr-indent))))) @@ -493,7 +512,7 @@ size, and full-buffer size." (delete-char -1))))) (defun shr-find-fill-point () - (when (> (move-to-column shr-width) shr-width) + (when (> (move-to-column shr-internal-width) shr-internal-width) (backward-char 1)) (let ((bp (point)) failed) @@ -533,7 +552,7 @@ size, and full-buffer size." ;; so we look for the second best position. (while (and (progn (forward-char 1) - (<= (current-column) shr-width)) + (<= (current-column) shr-internal-width)) (progn (setq bp (point)) (shr-char-kinsoku-eol-p (following-char))))) @@ -570,6 +589,8 @@ size, and full-buffer size." ;; Always chop off anchors. (when (string-match "#.*" url) (setq url (substring url 0 (match-beginning 0)))) + ;; NB: <base href="" > URI may itself be relative to the document s URI + (setq url (shr-expand-url url)) (let* ((parsed (url-generic-parse-url url)) (local (url-filename parsed))) (setf (url-filename parsed) "") @@ -592,6 +613,7 @@ size, and full-buffer size." (defun shr-expand-url (url &optional base) (setq base (if base + ;; shr-parse-base should never call this with non-nil base! (shr-parse-base base) ;; Bound by the parser. shr-base)) @@ -600,8 +622,8 @@ size, and full-buffer size." (cond ((or (not url) (not base) (string-match "\\`[a-z]*:" url)) - ;; Absolute URL. - (or url (car base))) + ;; Absolute or empty URI + (or url (nth 3 base))) ((eq (aref url 0) ?/) (if (and (> (length url) 1) (eq (aref url 1) ?/)) @@ -644,9 +666,9 @@ size, and full-buffer size." (when (> shr-indentation 0) (insert (make-string shr-indentation ? )))) -(defun shr-fontize-cont (cont &rest types) +(defun shr-fontize-dom (dom &rest types) (let (shr-start) - (shr-generic cont) + (shr-generic dom) (dolist (type types) (shr-add-font (or shr-start (point)) (point) type)))) @@ -759,6 +781,8 @@ element is the data blob and the second element is the content-type." ((eq size 'original) (create-image data nil t :ascent 100 :format content-type)) + ((eq content-type 'image/svg+xml) + (create-image data 'svg t :ascent 100)) ((eq size 'full) (ignore-errors (shr-rescale-image data content-type))) @@ -821,14 +845,24 @@ Return a string with image data." (shr-parse-image-data))))) (defun shr-parse-image-data () - (list - (buffer-substring (point) (point-max)) - (save-excursion - (save-restriction - (narrow-to-region (point-min) (point)) - (let ((content-type (mail-fetch-field "content-type"))) - (and content-type - (intern content-type obarray))))))) + (let ((data (buffer-substring (point) (point-max))) + (content-type + (save-excursion + (save-restriction + (narrow-to-region (point-min) (point)) + (let ((content-type (mail-fetch-field "content-type"))) + (and content-type + ;; Remove any comments in the type string. + (intern (replace-regexp-in-string ";.*" "" content-type) + obarray))))))) + ;; SVG images may contain references to further images that we may + ;; want to block. So special-case these by parsing the XML data + ;; and remove the blocked bits. + (when (eq content-type 'image/svg+xml) + (setq data + (shr-dom-to-xml + (libxml-parse-xml-region (point) (point-max))))) + (list data content-type))) (defun shr-image-displayer (content-function) "Return a function to display an image. @@ -850,9 +884,9 @@ START, and END. Note that START and END should be markers." (list (current-buffer) start end) t t))))) -(defun shr-heading (cont &rest types) +(defun shr-heading (dom &rest types) (shr-ensure-paragraph) - (apply #'shr-fontize-cont cont types) + (apply #'shr-fontize-dom dom types) (shr-ensure-paragraph)) (defun shr-urlify (start url &optional title) @@ -860,7 +894,12 @@ START, and END. Note that START and END should be markers." (add-text-properties start (point) (list 'shr-url url - 'help-echo (if title (format "%s (%s)" url title) url) + 'help-echo (let ((iri (or (ignore-errors + (decode-coding-string + (url-unhex-string url) + 'utf-8 t)) + url))) + (if title (format "%s (%s)" iri title) iri)) 'follow-link t 'mouse-face 'highlight 'keymap shr-map))) @@ -961,105 +1000,106 @@ ones, in case fg and bg are nil." ;;; Tag-specific rendering rules. -(defun shr-tag-body (cont) +(defun shr-tag-body (dom) (let* ((start (point)) - (fgcolor (cdr (or (assq :fgcolor cont) - (assq :text cont)))) - (bgcolor (cdr (assq :bgcolor cont))) + (fgcolor (or (dom-attr dom 'fgcolor) (dom-attr dom 'text))) + (bgcolor (dom-attr dom 'bgcolor)) (shr-stylesheet (list (cons 'color fgcolor) (cons 'background-color bgcolor)))) - (shr-generic cont) + (shr-generic dom) (shr-colorize-region start (point) fgcolor bgcolor))) -(defun shr-tag-style (_cont) +(defun shr-tag-style (_dom) ) -(defun shr-tag-script (_cont) +(defun shr-tag-script (_dom) ) -(defun shr-tag-comment (_cont) +(defun shr-tag-comment (_dom) ) (defun shr-dom-to-xml (dom) + (with-temp-buffer + (shr-dom-print dom) + (buffer-string))) + +(defun shr-dom-print (dom) "Convert DOM into a string containing the xml representation." - (let ((arg " ") - (text "") - url) - (dolist (sub (cdr dom)) + (insert (format "<%s" (dom-tag dom))) + (dolist (attr (dom-attributes dom)) + ;; Ignore attributes that start with a colon because they are + ;; private elements. + (unless (= (aref (format "%s" (car attr)) 0) ?:) + (insert (format " %s=\"%s\"" (car attr) (cdr attr))))) + (insert ">") + (let (url) + (dolist (elem (dom-children dom)) (cond - ((listp (cdr sub)) - ;; Ignore external image definitions if required. - ;; <image xlink:href="http://TRACKING_URL/"/> - (when (or (not (eq (car sub) 'image)) - (not (setq url (cdr (assq ':xlink:href (cdr sub))))) - (not shr-blocked-images) - (not (string-match shr-blocked-images url))) - (setq text (concat text (shr-dom-to-xml sub))))) - ((eq (car sub) 'text) - (setq text (concat text (cdr sub)))) - (t - (setq arg (concat arg (format "%s=\"%s\" " - (substring (symbol-name (car sub)) 1) - (cdr sub))))))) - (format "<%s%s>%s</%s>" - (car dom) - (substring arg 0 (1- (length arg))) - text - (car dom)))) - -(defun shr-tag-svg (cont) + ((stringp elem) + (insert elem)) + ((eq (dom-tag elem) 'comment) + ) + ((or (not (eq (dom-tag elem) 'image)) + ;; Filter out blocked elements inside the SVG image. + (not (setq url (dom-attr elem ':xlink:href))) + (not shr-blocked-images) + (not (string-match shr-blocked-images url))) + (insert " ") + (shr-dom-print elem))))) + (insert (format "</%s>" (dom-tag dom)))) + +(defun shr-tag-svg (dom) (when (and (image-type-available-p 'svg) (not shr-inhibit-images)) - (funcall shr-put-image-function - (shr-dom-to-xml (cons 'svg cont)) - "SVG Image"))) + (funcall shr-put-image-function (list (shr-dom-to-xml dom) 'image/svg+xml) + "SVG Image"))) -(defun shr-tag-sup (cont) +(defun shr-tag-sup (dom) (let ((start (point))) - (shr-generic cont) + (shr-generic dom) (put-text-property start (point) 'display '(raise 0.5)))) -(defun shr-tag-sub (cont) +(defun shr-tag-sub (dom) (let ((start (point))) - (shr-generic cont) + (shr-generic dom) (put-text-property start (point) 'display '(raise -0.5)))) -(defun shr-tag-label (cont) - (shr-generic cont) +(defun shr-tag-label (dom) + (shr-generic dom) (shr-ensure-paragraph)) -(defun shr-tag-p (cont) +(defun shr-tag-p (dom) (shr-ensure-paragraph) (shr-indent) - (shr-generic cont) + (shr-generic dom) (shr-ensure-paragraph)) -(defun shr-tag-div (cont) +(defun shr-tag-div (dom) (shr-ensure-newline) (shr-indent) - (shr-generic cont) + (shr-generic dom) (shr-ensure-newline)) -(defun shr-tag-s (cont) - (shr-fontize-cont cont 'shr-strike-through)) +(defun shr-tag-s (dom) + (shr-fontize-dom dom 'shr-strike-through)) -(defun shr-tag-del (cont) - (shr-fontize-cont cont 'shr-strike-through)) +(defun shr-tag-del (dom) + (shr-fontize-dom dom 'shr-strike-through)) -(defun shr-tag-b (cont) - (shr-fontize-cont cont 'bold)) +(defun shr-tag-b (dom) + (shr-fontize-dom dom 'bold)) -(defun shr-tag-i (cont) - (shr-fontize-cont cont 'italic)) +(defun shr-tag-i (dom) + (shr-fontize-dom dom 'italic)) -(defun shr-tag-em (cont) - (shr-fontize-cont cont 'italic)) +(defun shr-tag-em (dom) + (shr-fontize-dom dom 'italic)) -(defun shr-tag-strong (cont) - (shr-fontize-cont cont 'bold)) +(defun shr-tag-strong (dom) + (shr-fontize-dom dom 'bold)) -(defun shr-tag-u (cont) - (shr-fontize-cont cont 'underline)) +(defun shr-tag-u (dom) + (shr-fontize-dom dom 'underline)) (defun shr-parse-style (style) (when style @@ -1081,20 +1121,19 @@ ones, in case fg and bg are nil." plist))))) plist))) -(defun shr-tag-base (cont) - (let ((base (cdr (assq :href cont)))) - (when base - (setq shr-base (shr-parse-base base)))) - (shr-generic cont)) +(defun shr-tag-base (dom) + (when-let (base (dom-attr dom 'href)) + (setq shr-base (shr-parse-base base))) + (shr-generic dom)) -(defun shr-tag-a (cont) - (let ((url (cdr (assq :href cont))) - (title (cdr (assq :title cont))) +(defun shr-tag-a (dom) + (let ((url (dom-attr dom 'href)) + (title (dom-attr dom 'title)) (start (point)) shr-start) - (shr-generic cont) + (shr-generic dom) (when (and shr-target-id - (equal (cdr (assq :name cont)) shr-target-id)) + (equal (dom-attr dom 'name) shr-target-id)) ;; We have a zero-length <a name="foo"> element, so just ;; insert... something. (when (= start (point)) @@ -1105,19 +1144,33 @@ ones, in case fg and bg are nil." (not shr-inhibit-decoration)) (shr-urlify (or shr-start start) (shr-expand-url url) title)))) -(defun shr-tag-object (cont) - (let ((start (point)) - url) - (dolist (elem cont) - (when (eq (car elem) 'embed) - (setq url (or url (cdr (assq :src (cdr elem)))))) - (when (and (eq (car elem) 'param) - (equal (cdr (assq :name (cdr elem))) "movie")) - (setq url (or url (cdr (assq :value (cdr elem))))))) - (when url - (shr-insert " [multimedia] ") - (shr-urlify start (shr-expand-url url))) - (shr-generic cont))) +(defun shr-tag-object (dom) + (unless shr-inhibit-images + (let ((start (point)) + url multimedia image) + (when-let (type (dom-attr dom 'type)) + (when (string-match "\\`image/svg" type) + (setq url (dom-attr dom 'data) + image t))) + (dolist (child (dom-non-text-children dom)) + (cond + ((eq (dom-tag child) 'embed) + (setq url (or url (dom-attr child 'src)) + multimedia t)) + ((and (eq (dom-tag child) 'param) + (equal (dom-attr child 'name) "movie")) + (setq url (or url (dom-attr child 'value)) + multimedia t)))) + (when url + (cond + (image + (shr-tag-img dom url) + (setq dom nil)) + (multimedia + (shr-insert " [multimedia] ") + (shr-urlify start (shr-expand-url url))))) + (when dom + (shr-generic dom))))) (defcustom shr-prefer-media-type-alist '(("webm" . 1.0) ("ogv" . 1.0) @@ -1136,10 +1189,10 @@ url if no type is specified. The value should be a float in the range 0.0 to (defun shr--get-media-pref (elem) "Determine the preference for ELEM. The preference is a float determined from `shr-prefer-media-type'." - (let ((type (cdr (assq :type elem))) + (let ((type (dom-attr elem 'type)) (p 0.0)) (unless type - (setq type (cdr (assq :src elem)))) + (setq type (dom-attr elem 'src))) (when type (dolist (pref shr-prefer-media-type-alist) (when (and @@ -1148,61 +1201,61 @@ The preference is a float determined from `shr-prefer-media-type'." (setq p (cdr pref))))) p)) -(defun shr--extract-best-source (cont &optional url pref) - "Extract the best `:src' property from <source> blocks in CONT." +(defun shr--extract-best-source (dom &optional url pref) + "Extract the best `:src' property from <source> blocks in DOM." (setq pref (or pref -1.0)) (let (new-pref) - (dolist (elem cont) - (when (and (eq (car elem) 'source) + (dolist (elem (dom-non-text-children dom)) + (when (and (eq (dom-tag elem) 'source) (< pref (setq new-pref (shr--get-media-pref elem)))) (setq pref new-pref - url (cdr (assq :src elem))) + url (dom-attr elem 'src)) ;; libxml's html parser isn't HTML5 compliant and non terminated ;; source tags might end up as children. So recursion it is... - (dolist (child (cdr elem)) - (when (eq (car child) 'source) + (dolist (child (dom-non-text-children elem)) + (when (eq (dom-tag child) 'source) (let ((ret (shr--extract-best-source (list child) url pref))) (when (< pref (cdr ret)) (setq url (car ret) pref (cdr ret))))))))) (cons url pref)) -(defun shr-tag-video (cont) - (let ((image (cdr (assq :poster cont))) - (url (cdr (assq :src cont))) +(defun shr-tag-video (dom) + (let ((image (dom-attr dom 'poster)) + (url (dom-attr dom 'src)) (start (point))) (unless url - (setq url (car (shr--extract-best-source cont)))) + (setq url (car (shr--extract-best-source dom)))) (if image (shr-tag-img nil image) (shr-insert " [video] ")) (shr-urlify start (shr-expand-url url)))) -(defun shr-tag-audio (cont) - (let ((url (cdr (assq :src cont))) +(defun shr-tag-audio (dom) + (let ((url (dom-attr dom 'src)) (start (point))) (unless url - (setq url (car (shr--extract-best-source cont)))) + (setq url (car (shr--extract-best-source dom)))) (shr-insert " [audio] ") (shr-urlify start (shr-expand-url url)))) -(defun shr-tag-img (cont &optional url) +(defun shr-tag-img (dom &optional url) (when (or url - (and cont - (> (length (cdr (assq :src cont))) 0))) + (and dom + (> (length (dom-attr dom 'src)) 0))) (when (and (> (current-column) 0) (not (eq shr-state 'image))) (insert "\n")) - (let ((alt (cdr (assq :alt cont))) - (url (shr-expand-url (or url (cdr (assq :src cont)))))) + (let ((alt (dom-attr dom 'alt)) + (url (shr-expand-url (or url (dom-attr dom 'src))))) (let ((start (point-marker))) (when (zerop (length alt)) (setq alt "*")) (cond - ((or (member (cdr (assq :height cont)) '("0" "1")) - (member (cdr (assq :width cont)) '("0" "1"))) + ((or (member (dom-attr dom 'height) '("0" "1")) + (member (dom-attr dom 'width) '("0" "1"))) ;; Ignore zero-sized or single-pixel images. ) ((and (not shr-inhibit-images) @@ -1248,52 +1301,51 @@ The preference is a float determined from `shr-prefer-media-type'." (put-text-property start (point) 'image-displayer (shr-image-displayer shr-content-function)) (put-text-property start (point) 'help-echo - (or (cdr (assq :title cont)) - alt))) + (shr-fold-text (or (dom-attr dom 'title) alt)))) (setq shr-state 'image))))) -(defun shr-tag-pre (cont) +(defun shr-tag-pre (dom) (let ((shr-folding-mode 'none)) (shr-ensure-newline) (shr-indent) - (shr-generic cont) + (shr-generic dom) (shr-ensure-newline))) -(defun shr-tag-blockquote (cont) +(defun shr-tag-blockquote (dom) (shr-ensure-paragraph) (shr-indent) (let ((shr-indentation (+ shr-indentation 4))) - (shr-generic cont)) + (shr-generic dom)) (shr-ensure-paragraph)) -(defun shr-tag-dl (cont) +(defun shr-tag-dl (dom) (shr-ensure-paragraph) - (shr-generic cont) + (shr-generic dom) (shr-ensure-paragraph)) -(defun shr-tag-dt (cont) +(defun shr-tag-dt (dom) (shr-ensure-newline) - (shr-generic cont) + (shr-generic dom) (shr-ensure-newline)) -(defun shr-tag-dd (cont) +(defun shr-tag-dd (dom) (shr-ensure-newline) (let ((shr-indentation (+ shr-indentation 4))) - (shr-generic cont))) + (shr-generic dom))) -(defun shr-tag-ul (cont) +(defun shr-tag-ul (dom) (shr-ensure-paragraph) (let ((shr-list-mode 'ul)) - (shr-generic cont)) + (shr-generic dom)) (shr-ensure-paragraph)) -(defun shr-tag-ol (cont) +(defun shr-tag-ol (dom) (shr-ensure-paragraph) (let ((shr-list-mode 1)) - (shr-generic cont)) + (shr-generic dom)) (shr-ensure-paragraph)) -(defun shr-tag-li (cont) +(defun shr-tag-li (dom) (shr-ensure-newline) (shr-indent) (let* ((bullet @@ -1304,9 +1356,9 @@ The preference is a float determined from `shr-prefer-media-type'." shr-bullet)) (shr-indentation (+ shr-indentation (length bullet)))) (insert bullet) - (shr-generic cont))) + (shr-generic dom))) -(defun shr-tag-br (cont) +(defun shr-tag-br (dom) (when (and (not (bobp)) ;; Only add a newline if we break the current line, or ;; the previous line isn't a blank line. @@ -1315,42 +1367,42 @@ The preference is a float determined from `shr-prefer-media-type'." (not (= (char-after (- (point) 2)) ?\n))))) (insert "\n") (shr-indent)) - (shr-generic cont)) + (shr-generic dom)) -(defun shr-tag-span (cont) - (shr-generic cont)) +(defun shr-tag-span (dom) + (shr-generic dom)) -(defun shr-tag-h1 (cont) - (shr-heading cont 'bold 'underline)) +(defun shr-tag-h1 (dom) + (shr-heading dom 'bold 'underline)) -(defun shr-tag-h2 (cont) - (shr-heading cont 'bold)) +(defun shr-tag-h2 (dom) + (shr-heading dom 'bold)) -(defun shr-tag-h3 (cont) - (shr-heading cont 'italic)) +(defun shr-tag-h3 (dom) + (shr-heading dom 'italic)) -(defun shr-tag-h4 (cont) - (shr-heading cont)) +(defun shr-tag-h4 (dom) + (shr-heading dom)) -(defun shr-tag-h5 (cont) - (shr-heading cont)) +(defun shr-tag-h5 (dom) + (shr-heading dom)) -(defun shr-tag-h6 (cont) - (shr-heading cont)) +(defun shr-tag-h6 (dom) + (shr-heading dom)) -(defun shr-tag-hr (_cont) +(defun shr-tag-hr (_dom) (shr-ensure-newline) - (insert (make-string shr-width shr-hr-line) "\n")) + (insert (make-string shr-internal-width shr-hr-line) "\n")) -(defun shr-tag-title (cont) - (shr-heading cont 'bold 'underline)) +(defun shr-tag-title (dom) + (shr-heading dom 'bold 'underline)) -(defun shr-tag-font (cont) +(defun shr-tag-font (dom) (let* ((start (point)) - (color (cdr (assq :color cont))) + (color (dom-attr dom 'color)) (shr-stylesheet (nconc (list (cons 'color color)) shr-stylesheet))) - (shr-generic cont) + (shr-generic dom) (when color (shr-colorize-region start (point) color (cdr (assq 'background-color shr-stylesheet)))))) @@ -1365,23 +1417,22 @@ The preference is a float determined from `shr-prefer-media-type'." ;; main buffer). Now we know how much space each TD really takes, so ;; we then render everything again with the new widths, and finally ;; insert all these boxes into the main buffer. -(defun shr-tag-table-1 (cont) - (setq cont (or (cdr (assq 'tbody cont)) - cont)) +(defun shr-tag-table-1 (dom) + (setq dom (or (dom-child-by-tag dom 'tbody) dom)) (let* ((shr-inhibit-images t) (shr-table-depth (1+ shr-table-depth)) (shr-kinsoku-shorten t) ;; Find all suggested widths. - (columns (shr-column-specs cont)) + (columns (shr-column-specs dom)) ;; Compute how many characters wide each TD should be. (suggested-widths (shr-pro-rate-columns columns)) ;; Do a "test rendering" to see how big each TD is (this can ;; be smaller (if there's little text) or bigger (if there's ;; unbreakable text). - (sketch (shr-make-table cont suggested-widths)) + (sketch (shr-make-table dom suggested-widths)) ;; Compute the "natural" width by setting each column to 500 ;; characters and see how wide they really render. - (natural (shr-make-table cont (make-vector (length columns) 500))) + (natural (shr-make-table dom (make-vector (length columns) 500))) (sketch-widths (shr-table-widths sketch natural suggested-widths))) ;; This probably won't work very well. (when (> (+ (loop for width across sketch-widths @@ -1390,15 +1441,16 @@ The preference is a float determined from `shr-prefer-media-type'." (frame-width)) (setq truncate-lines t)) ;; Then render the table again with these new "hard" widths. - (shr-insert-table (shr-make-table cont sketch-widths t) sketch-widths))) + (shr-insert-table (shr-make-table dom sketch-widths t) sketch-widths))) -(defun shr-tag-table (cont) +(defun shr-tag-table (dom) (shr-ensure-paragraph) - (let* ((caption (cdr (assq 'caption cont))) - (header (cdr (assq 'thead cont))) - (body (or (cdr (assq 'tbody cont)) cont)) - (footer (cdr (assq 'tfoot cont))) - (bgcolor (cdr (assq :bgcolor cont))) + (let* ((caption (dom-children (dom-child-by-tag dom 'caption))) + (header (dom-non-text-children (dom-child-by-tag dom 'thead))) + (body (dom-non-text-children (or (dom-child-by-tag dom 'tbody) + dom))) + (footer (dom-non-text-children (dom-child-by-tag dom 'tfoot))) + (bgcolor (dom-attr dom 'bgcolor)) (start (point)) (shr-stylesheet (nconc (list (cons 'background-color bgcolor)) shr-stylesheet)) @@ -1407,51 +1459,71 @@ The preference is a float determined from `shr-prefer-media-type'." (nfooter (if footer (shr-max-columns footer)))) (if (and (not caption) (not header) - (not (cdr (assq 'tbody cont))) - (not (cdr (assq 'tr cont))) + (not (dom-child-by-tag dom 'tbody)) + (not (dom-child-by-tag dom 'tr)) (not footer)) ;; The table is totally invalid and just contains random junk. ;; Try to output it anyway. - (shr-generic cont) + (shr-generic dom) ;; It's a real table, so render it. (shr-tag-table-1 (nconc - (if caption `((tr (td ,@caption)))) - (if header - (if footer - ;; header + body + footer - (if (= nheader nbody) - (if (= nbody nfooter) - `((tr (td (table (tbody ,@header ,@body ,@footer))))) - (nconc `((tr (td (table (tbody ,@header ,@body))))) - (if (= nfooter 1) - footer - `((tr (td (table (tbody ,@footer)))))))) - (nconc `((tr (td (table (tbody ,@header))))) - (if (= nbody nfooter) - `((tr (td (table (tbody ,@body ,@footer))))) - (nconc `((tr (td (table (tbody ,@body))))) - (if (= nfooter 1) - footer - `((tr (td (table (tbody ,@footer)))))))))) - ;; header + body - (if (= nheader nbody) - `((tr (td (table (tbody ,@header ,@body))))) - (if (= nheader 1) - `(,@header (tr (td (table (tbody ,@body))))) - `((tr (td (table (tbody ,@header)))) - (tr (td (table (tbody ,@body)))))))) - (if footer - ;; body + footer - (if (= nbody nfooter) - `((tr (td (table (tbody ,@body ,@footer))))) - (nconc `((tr (td (table (tbody ,@body))))) - (if (= nfooter 1) - footer - `((tr (td (table (tbody ,@footer)))))))) - (if caption - `((tr (td (table (tbody ,@body))))) - body)))))) + (list 'table nil) + (if caption `((tr nil (td nil ,@caption)))) + (cond (header + (if footer + ;; header + body + footer + (if (= nheader nbody) + (if (= nbody nfooter) + `((tr nil (td nil (table nil + (tbody nil ,@header + ,@body ,@footer))))) + (nconc `((tr nil (td nil (table nil + (tbody nil ,@header + ,@body))))) + (if (= nfooter 1) + footer + `((tr nil (td nil (table + nil (tbody + nil ,@footer)))))))) + (nconc `((tr nil (td nil (table nil (tbody + nil ,@header))))) + (if (= nbody nfooter) + `((tr nil (td nil (table + nil (tbody nil ,@body + ,@footer))))) + (nconc `((tr nil (td nil (table + nil (tbody nil + ,@body))))) + (if (= nfooter 1) + footer + `((tr nil (td nil (table + nil + (tbody + nil + ,@footer)))))))))) + ;; header + body + (if (= nheader nbody) + `((tr nil (td nil (table nil (tbody nil ,@header + ,@body))))) + (if (= nheader 1) + `(,@header (tr nil (td nil (table + nil (tbody nil ,@body))))) + `((tr nil (td nil (table nil (tbody nil ,@header)))) + (tr nil (td nil (table nil (tbody nil ,@body))))))))) + (footer + ;; body + footer + (if (= nbody nfooter) + `((tr nil (td nil (table + nil (tbody nil ,@body ,@footer))))) + (nconc `((tr nil (td nil (table nil (tbody nil ,@body))))) + (if (= nfooter 1) + footer + `((tr nil (td nil (table + nil (tbody nil ,@footer))))))))) + (caption + `((tr nil (td nil (table nil (tbody nil ,@body)))))) + (body))))) (when bgcolor (shr-colorize-region start (point) (cdr (assq 'color shr-stylesheet)) bgcolor)) @@ -1459,17 +1531,10 @@ The preference is a float determined from `shr-prefer-media-type'." ;; model isn't strong enough to allow us to put the images actually ;; into the tables. (when (zerop shr-table-depth) - (dolist (elem (shr-find-elements cont 'img)) - (shr-tag-img (cdr elem)))))) - -(defun shr-find-elements (cont type) - (let (result) - (dolist (elem cont) - (cond ((eq (car elem) type) - (push elem result)) - ((consp (cdr elem)) - (setq result (nconc (shr-find-elements (cdr elem) type) result))))) - (nreverse result))) + (dolist (elem (dom-by-tag dom 'object)) + (shr-tag-object elem)) + (dolist (elem (dom-by-tag dom 'img)) + (shr-tag-img elem))))) (defun shr-insert-table (table widths) (let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet)) @@ -1552,22 +1617,22 @@ The preference is a float determined from `shr-prefer-media-type'." (aref widths i)))))))) widths)) -(defun shr-make-table (cont widths &optional fill) - (or (cadr (assoc (list cont widths fill) shr-content-cache)) - (let ((data (shr-make-table-1 cont widths fill))) - (push (list (list cont widths fill) data) +(defun shr-make-table (dom widths &optional fill) + (or (cadr (assoc (list dom widths fill) shr-content-cache)) + (let ((data (shr-make-table-1 dom widths fill))) + (push (list (list dom widths fill) data) shr-content-cache) data))) -(defun shr-make-table-1 (cont widths &optional fill) +(defun shr-make-table-1 (dom widths &optional fill) (let ((trs nil) (shr-inhibit-decoration (not fill)) (rowspans (make-vector (length widths) 0)) width colspan) - (dolist (row cont) - (when (eq (car row) 'tr) + (dolist (row (dom-non-text-children dom)) + (when (eq (dom-tag row) 'tr) (let ((tds nil) - (columns (cdr row)) + (columns (dom-children row)) (i 0) (width-column 0) column) @@ -1581,12 +1646,12 @@ The preference is a float determined from `shr-prefer-media-type'." (pop columns) (aset rowspans i (1- (aref rowspans i))) '(td))) - (when (or (memq (car column) '(td th)) - (not column)) - (when (cdr (assq :rowspan (cdr column))) + (when (and (not (stringp column)) + (or (memq (dom-tag column) '(td th)) + (not column))) + (when-let (span (dom-attr column 'rowspan)) (aset rowspans i (+ (aref rowspans i) - (1- (string-to-number - (cdr (assq :rowspan (cdr column)))))))) + (1- (string-to-number span))))) ;; Sanity check for invalid column-spans. (when (>= width-column (length widths)) (setq width-column 0)) @@ -1595,7 +1660,7 @@ The preference is a float determined from `shr-prefer-media-type'." (aref widths width-column) 10)) (when (and fill - (setq colspan (cdr (assq :colspan (cdr column))))) + (setq colspan (dom-attr column 'colspan))) (setq colspan (min (string-to-number colspan) ;; The colspan may be wrong, so ;; truncate it to the length of the @@ -1610,18 +1675,18 @@ The preference is a float determined from `shr-prefer-media-type'." (setq width-column (+ width-column (1- colspan)))) (when (or column (not fill)) - (push (shr-render-td (cdr column) width fill) + (push (shr-render-td column width fill) tds)) (setq i (1+ i) width-column (1+ width-column)))) (push (nreverse tds) trs)))) (nreverse trs))) -(defun shr-render-td (cont width fill) +(defun shr-render-td (dom width fill) (with-temp-buffer - (let ((bgcolor (cdr (assq :bgcolor cont))) - (fgcolor (cdr (assq :fgcolor cont))) - (style (cdr (assq :style cont))) + (let ((bgcolor (dom-attr dom 'bgcolor)) + (fgcolor (dom-attr dom 'fgcolor)) + (style (dom-attr dom 'style)) (shr-stylesheet shr-stylesheet) actual-colors) (when style @@ -1633,9 +1698,9 @@ The preference is a float determined from `shr-prefer-media-type'." (setq style (nconc (list (cons 'color fgcolor)) style))) (when style (setq shr-stylesheet (append style shr-stylesheet))) - (let ((shr-width width) + (let ((shr-internal-width width) (shr-indentation 0)) - (shr-descend (cons 'td cont))) + (shr-descend dom)) ;; Delete padding at the bottom of the TDs. (delete-region (point) @@ -1656,7 +1721,7 @@ The preference is a float determined from `shr-prefer-media-type'." (if (zerop (buffer-size)) (insert (make-string width ? )) ;; Otherwise, fill the buffer. - (let ((align (cdr (assq :align cont))) + (let ((align (dom-attr dom 'align)) length) (while (not (eobp)) (end-of-line) @@ -1705,19 +1770,21 @@ The preference is a float determined from `shr-prefer-media-type'." (dotimes (i (length columns)) (aset widths i (max (truncate (* (aref columns i) total-percentage - (- shr-width (1+ (length columns))))) + (- shr-internal-width + (1+ (length columns))))) 10))) widths)) ;; Return a summary of the number and shape of the TDs in the table. -(defun shr-column-specs (cont) - (let ((columns (make-vector (shr-max-columns cont) 1))) - (dolist (row cont) - (when (eq (car row) 'tr) +(defun shr-column-specs (dom) + (let ((columns (make-vector (shr-max-columns dom) 1))) + (dolist (row (dom-non-text-children dom)) + (when (eq (dom-tag row) 'tr) (let ((i 0)) - (dolist (column (cdr row)) - (when (memq (car column) '(td th)) - (let ((width (cdr (assq :width (cdr column))))) + (dolist (column (dom-children row)) + (when (and (not (stringp column)) + (memq (dom-tag column) '(td th))) + (let ((width (dom-attr column 'width))) (when (and width (string-match "\\([0-9]+\\)%" width) (not (zerop (setq width (string-to-number @@ -1726,19 +1793,21 @@ The preference is a float determined from `shr-prefer-media-type'." (setq i (1+ i))))))) columns)) -(defun shr-count (cont elem) +(defun shr-count (dom elem) (let ((i 0)) - (dolist (sub cont) - (when (eq (car sub) elem) + (dolist (sub (dom-children dom)) + (when (and (not (stringp sub)) + (eq (dom-tag sub) elem)) (setq i (1+ i)))) i)) -(defun shr-max-columns (cont) +(defun shr-max-columns (dom) (let ((max 0)) - (dolist (row cont) - (when (eq (car row) 'tr) - (setq max (max max (+ (shr-count (cdr row) 'td) - (shr-count (cdr row) 'th)))))) + (dolist (row (dom-children dom)) + (when (and (not (stringp row)) + (eq (dom-tag row) 'tr)) + (setq max (max max (+ (shr-count row 'td) + (shr-count row 'th)))))) max)) (provide 'shr) |