summaryrefslogtreecommitdiff
path: root/lisp/net/shr.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/net/shr.el')
-rw-r--r--lisp/net/shr.el745
1 files changed, 407 insertions, 338 deletions
diff --git a/lisp/net/shr.el b/lisp/net/shr.el
index dcc1654aba8..a0c9eba4144 100644
--- a/lisp/net/shr.el
+++ b/lisp/net/shr.el
@@ -33,11 +33,13 @@
(eval-when-compile (require 'cl))
(eval-when-compile (require 'url)) ;For url-filename's setf handler.
(require 'browse-url)
+(require 'subr-x)
+(require 'dom)
(defgroup shr nil
"Simple HTML Renderer"
- :version "24.1"
- :group 'hypermedia)
+ :version "25.1"
+ :group 'web)
(defcustom shr-max-image-proportion 0.9
"How big pictures displayed are in relation to the window they're in.
@@ -76,11 +78,12 @@ If nil, don't draw horizontal table lines."
:group 'shr
:type 'character)
-(defcustom shr-width fill-column
+(defcustom shr-width nil
"Frame width to use for rendering.
May either be an integer specifying a fixed width in characters,
or nil, meaning that the full width of the window should be
used."
+ :version "25.1"
:type '(choice (integer :tag "Fixed width in characters")
(const :tag "Use the width of the window" nil))
:group 'shr)
@@ -123,19 +126,24 @@ cid: URL as the argument.")
"Font for link elements."
:group 'shr)
+(defvar shr-inhibit-images nil
+ "If non-nil, inhibit loading images.")
+
;;; Internal variables.
(defvar shr-folding-mode nil)
(defvar shr-state nil)
(defvar shr-start nil)
(defvar shr-indentation 0)
-(defvar shr-inhibit-images nil)
+(defvar shr-internal-width (or shr-width (1- (window-width))))
(defvar shr-list-mode nil)
(defvar shr-content-cache nil)
(defvar shr-kinsoku-shorten nil)
(defvar shr-table-depth 0)
(defvar shr-stylesheet nil)
(defvar shr-base nil)
+(defvar shr-depth 0)
+(defvar shr-warning nil)
(defvar shr-ignore-cache nil)
(defvar shr-external-rendering-functions nil)
(defvar shr-target-id nil)
@@ -197,9 +205,13 @@ DOM should be a parse tree as generated by
(shr-state nil)
(shr-start nil)
(shr-base nil)
- (shr-width (or shr-width (1- (window-width)))))
- (shr-descend (shr-transform-dom dom))
- (shr-remove-trailing-whitespace start (point))))
+ (shr-depth 0)
+ (shr-warning nil)
+ (shr-internal-width (or shr-width (1- (window-width)))))
+ (shr-descend dom)
+ (shr-remove-trailing-whitespace start (point))
+ (when shr-warning
+ (message "%s" shr-warning))))
(defun shr-remove-trailing-whitespace (start end)
(let ((width (window-width)))
@@ -214,12 +226,16 @@ DOM should be a parse tree as generated by
(overlay-put overlay 'before-string nil))))
(forward-line 1)))))
-(defun shr-copy-url ()
+(defun shr-copy-url (&optional image-url)
"Copy the URL under point to the kill ring.
+If IMAGE-URL (the prefix) is non-nil, or there is no link under
+point, but there is an image under point then copy the URL of the
+image under point instead.
If called twice, then try to fetch the URL and see whether it
redirects somewhere else."
- (interactive)
- (let ((url (get-text-property (point) 'shr-url)))
+ (interactive "P")
+ (let ((url (or (get-text-property (point) 'shr-url)
+ (get-text-property (point) 'image-url))))
(cond
((not url)
(message "No URL under point"))
@@ -242,16 +258,17 @@ redirects somewhere else."
;; Copy the URL to the kill ring.
(t
(with-temp-buffer
- (insert url)
+ (insert (url-encode-url url))
(copy-region-as-kill (point-min) (point-max))
- (message "Copied %s" url))))))
+ (message "Copied %s" (buffer-string)))))))
(defun shr-next-link ()
"Skip to the next link."
(interactive)
(let ((skip (text-property-any (point) (point-max) 'help-echo nil)))
- (if (not (setq skip (text-property-not-all skip (point-max)
- 'help-echo nil)))
+ (if (or (eobp)
+ (not (setq skip (text-property-not-all skip (point-max)
+ 'help-echo nil))))
(message "No next link")
(goto-char skip)
(message "%s" (get-text-property (point) 'help-echo)))))
@@ -286,7 +303,7 @@ redirects somewhere else."
(let ((text (get-text-property (point) 'shr-alt)))
(if (not text)
(message "No image under point")
- (message "%s" text))))
+ (message "%s" (shr-fold-text text)))))
(defun shr-browse-image (&optional copy-url)
"Browse the image under point.
@@ -353,78 +370,80 @@ size, and full-buffer size."
;;; Utility functions.
-(defun shr-transform-dom (dom)
- (let ((result (list (pop dom))))
- (dolist (arg (pop dom))
- (push (cons (intern (concat ":" (symbol-name (car arg))) obarray)
- (cdr arg))
- result))
- (dolist (sub dom)
- (if (stringp sub)
- (push (cons 'text sub) result)
- (push (shr-transform-dom sub) result)))
- (nreverse result)))
-
-(defsubst shr-generic (cont)
- (dolist (sub cont)
- (cond
- ((eq (car sub) 'text)
- (shr-insert (cdr sub)))
- ((listp (cdr sub))
- (shr-descend sub)))))
+(defsubst shr-generic (dom)
+ (dolist (sub (dom-children dom))
+ (if (stringp sub)
+ (shr-insert sub)
+ (shr-descend sub))))
(defun shr-descend (dom)
(let ((function
(or
;; Allow other packages to override (or provide) rendering
;; of elements.
- (cdr (assq (car dom) shr-external-rendering-functions))
- (intern (concat "shr-tag-" (symbol-name (car dom))) obarray)))
- (style (cdr (assq :style (cdr dom))))
+ (cdr (assq (dom-tag dom) shr-external-rendering-functions))
+ (intern (concat "shr-tag-" (symbol-name (dom-tag dom))) obarray)))
+ (style (dom-attr dom 'style))
(shr-stylesheet shr-stylesheet)
+ (shr-depth (1+ shr-depth))
(start (point)))
- (when style
- (if (string-match "color\\|display\\|border-collapse" style)
- (setq shr-stylesheet (nconc (shr-parse-style style)
- shr-stylesheet))
- (setq style nil)))
- ;; If we have a display:none, then just ignore this part of the DOM.
- (unless (equal (cdr (assq 'display shr-stylesheet)) "none")
- (if (fboundp function)
- (funcall function (cdr dom))
- (shr-generic (cdr dom)))
- (when (and shr-target-id
- (equal (cdr (assq :id (cdr dom))) shr-target-id))
- ;; If the element was empty, we don't have anything to put the
- ;; anchor on. So just insert a dummy character.
- (when (= start (point))
- (insert "*"))
- (put-text-property start (1+ start) 'shr-target-id shr-target-id))
- ;; If style is set, then this node has set the color.
+ ;; shr uses about 12 frames per nested node.
+ (if (> shr-depth (/ max-specpdl-size 12))
+ (setq shr-warning "Too deeply nested to render properly; consider increasing `max-specpdl-size'")
(when style
- (shr-colorize-region start (point)
- (cdr (assq 'color shr-stylesheet))
- (cdr (assq 'background-color shr-stylesheet)))))))
-
-(defmacro shr-char-breakable-p (char)
+ (if (string-match "color\\|display\\|border-collapse" style)
+ (setq shr-stylesheet (nconc (shr-parse-style style)
+ shr-stylesheet))
+ (setq style nil)))
+ ;; If we have a display:none, then just ignore this part of the DOM.
+ (unless (equal (cdr (assq 'display shr-stylesheet)) "none")
+ (if (fboundp function)
+ (funcall function dom)
+ (shr-generic dom))
+ (when (and shr-target-id
+ (equal (dom-attr dom 'id) shr-target-id))
+ ;; If the element was empty, we don't have anything to put the
+ ;; anchor on. So just insert a dummy character.
+ (when (= start (point))
+ (insert "*"))
+ (put-text-property start (1+ start) 'shr-target-id shr-target-id))
+ ;; If style is set, then this node has set the color.
+ (when style
+ (shr-colorize-region
+ start (point)
+ (cdr (assq 'color shr-stylesheet))
+ (cdr (assq 'background-color shr-stylesheet))))))))
+
+(defun shr-fold-text (text)
+ (if (zerop (length text))
+ text
+ (with-temp-buffer
+ (let ((shr-indentation 0)
+ (shr-state nil)
+ (shr-start nil)
+ (shr-internal-width (window-width)))
+ (shr-insert text)
+ (buffer-string)))))
+
+(define-inline shr-char-breakable-p (char)
"Return non-nil if a line can be broken before and after CHAR."
- `(aref fill-find-break-point-function-table ,char))
-(defmacro shr-char-nospace-p (char)
+ (inline-quote (aref fill-find-break-point-function-table ,char)))
+(define-inline shr-char-nospace-p (char)
"Return non-nil if no space is required before and after CHAR."
- `(aref fill-nospace-between-words-table ,char))
+ (inline-quote (aref fill-nospace-between-words-table ,char)))
;; KINSOKU is a Japanese word meaning a rule that should not be violated.
;; In Emacs, it is a term used for characters, e.g. punctuation marks,
;; parentheses, and so on, that should not be placed in the beginning
;; of a line or the end of a line.
-(defmacro shr-char-kinsoku-bol-p (char)
+(define-inline shr-char-kinsoku-bol-p (char)
"Return non-nil if a line ought not to begin with CHAR."
- `(let ((char ,char))
- (and (not (eq char ?'))
- (aref (char-category-set char) ?>))))
-(defmacro shr-char-kinsoku-eol-p (char)
+ (inline-letevals (char)
+ (inline-quote (and (not (eq ,char ?'))
+ (aref (char-category-set ,char) ?>)))))
+(define-inline shr-char-kinsoku-eol-p (char)
"Return non-nil if a line ought not to end with CHAR."
- `(aref (char-category-set ,char) ?<))
+ (inline-quote (aref (char-category-set ,char) ?<)))
(unless (shr-char-kinsoku-bol-p (make-char 'japanese-jisx0208 33 35))
(load "kinsoku" nil t))
@@ -467,8 +486,8 @@ size, and full-buffer size."
(insert elem)
(setq shr-state nil)
(let (found)
- (while (and (> (current-column) shr-width)
- (> shr-width 0)
+ (while (and (> (current-column) shr-internal-width)
+ (> shr-internal-width 0)
(progn
(setq found (shr-find-fill-point))
(not (eolp))))
@@ -482,10 +501,10 @@ size, and full-buffer size."
(when (> shr-indentation 0)
(shr-indent))
(end-of-line))
- (if (<= (current-column) shr-width)
+ (if (<= (current-column) shr-internal-width)
(insert " ")
;; In case we couldn't get a valid break point (because of a
- ;; word that's longer than `shr-width'), just break anyway.
+ ;; word that's longer than `shr-internal-width'), just break anyway.
(insert "\n")
(when (> shr-indentation 0)
(shr-indent)))))
@@ -493,7 +512,7 @@ size, and full-buffer size."
(delete-char -1)))))
(defun shr-find-fill-point ()
- (when (> (move-to-column shr-width) shr-width)
+ (when (> (move-to-column shr-internal-width) shr-internal-width)
(backward-char 1))
(let ((bp (point))
failed)
@@ -533,7 +552,7 @@ size, and full-buffer size."
;; so we look for the second best position.
(while (and (progn
(forward-char 1)
- (<= (current-column) shr-width))
+ (<= (current-column) shr-internal-width))
(progn
(setq bp (point))
(shr-char-kinsoku-eol-p (following-char)))))
@@ -570,6 +589,8 @@ size, and full-buffer size."
;; Always chop off anchors.
(when (string-match "#.*" url)
(setq url (substring url 0 (match-beginning 0))))
+ ;; NB: <base href="" > URI may itself be relative to the document s URI
+ (setq url (shr-expand-url url))
(let* ((parsed (url-generic-parse-url url))
(local (url-filename parsed)))
(setf (url-filename parsed) "")
@@ -592,6 +613,7 @@ size, and full-buffer size."
(defun shr-expand-url (url &optional base)
(setq base
(if base
+ ;; shr-parse-base should never call this with non-nil base!
(shr-parse-base base)
;; Bound by the parser.
shr-base))
@@ -600,8 +622,8 @@ size, and full-buffer size."
(cond ((or (not url)
(not base)
(string-match "\\`[a-z]*:" url))
- ;; Absolute URL.
- (or url (car base)))
+ ;; Absolute or empty URI
+ (or url (nth 3 base)))
((eq (aref url 0) ?/)
(if (and (> (length url) 1)
(eq (aref url 1) ?/))
@@ -644,9 +666,9 @@ size, and full-buffer size."
(when (> shr-indentation 0)
(insert (make-string shr-indentation ? ))))
-(defun shr-fontize-cont (cont &rest types)
+(defun shr-fontize-dom (dom &rest types)
(let (shr-start)
- (shr-generic cont)
+ (shr-generic dom)
(dolist (type types)
(shr-add-font (or shr-start (point)) (point) type))))
@@ -759,6 +781,8 @@ element is the data blob and the second element is the content-type."
((eq size 'original)
(create-image data nil t :ascent 100
:format content-type))
+ ((eq content-type 'image/svg+xml)
+ (create-image data 'svg t :ascent 100))
((eq size 'full)
(ignore-errors
(shr-rescale-image data content-type)))
@@ -821,14 +845,24 @@ Return a string with image data."
(shr-parse-image-data)))))
(defun shr-parse-image-data ()
- (list
- (buffer-substring (point) (point-max))
- (save-excursion
- (save-restriction
- (narrow-to-region (point-min) (point))
- (let ((content-type (mail-fetch-field "content-type")))
- (and content-type
- (intern content-type obarray)))))))
+ (let ((data (buffer-substring (point) (point-max)))
+ (content-type
+ (save-excursion
+ (save-restriction
+ (narrow-to-region (point-min) (point))
+ (let ((content-type (mail-fetch-field "content-type")))
+ (and content-type
+ ;; Remove any comments in the type string.
+ (intern (replace-regexp-in-string ";.*" "" content-type)
+ obarray)))))))
+ ;; SVG images may contain references to further images that we may
+ ;; want to block. So special-case these by parsing the XML data
+ ;; and remove the blocked bits.
+ (when (eq content-type 'image/svg+xml)
+ (setq data
+ (shr-dom-to-xml
+ (libxml-parse-xml-region (point) (point-max)))))
+ (list data content-type)))
(defun shr-image-displayer (content-function)
"Return a function to display an image.
@@ -850,9 +884,9 @@ START, and END. Note that START and END should be markers."
(list (current-buffer) start end)
t t)))))
-(defun shr-heading (cont &rest types)
+(defun shr-heading (dom &rest types)
(shr-ensure-paragraph)
- (apply #'shr-fontize-cont cont types)
+ (apply #'shr-fontize-dom dom types)
(shr-ensure-paragraph))
(defun shr-urlify (start url &optional title)
@@ -860,7 +894,12 @@ START, and END. Note that START and END should be markers."
(add-text-properties
start (point)
(list 'shr-url url
- 'help-echo (if title (format "%s (%s)" url title) url)
+ 'help-echo (let ((iri (or (ignore-errors
+ (decode-coding-string
+ (url-unhex-string url)
+ 'utf-8 t))
+ url)))
+ (if title (format "%s (%s)" iri title) iri))
'follow-link t
'mouse-face 'highlight
'keymap shr-map)))
@@ -961,105 +1000,106 @@ ones, in case fg and bg are nil."
;;; Tag-specific rendering rules.
-(defun shr-tag-body (cont)
+(defun shr-tag-body (dom)
(let* ((start (point))
- (fgcolor (cdr (or (assq :fgcolor cont)
- (assq :text cont))))
- (bgcolor (cdr (assq :bgcolor cont)))
+ (fgcolor (or (dom-attr dom 'fgcolor) (dom-attr dom 'text)))
+ (bgcolor (dom-attr dom 'bgcolor))
(shr-stylesheet (list (cons 'color fgcolor)
(cons 'background-color bgcolor))))
- (shr-generic cont)
+ (shr-generic dom)
(shr-colorize-region start (point) fgcolor bgcolor)))
-(defun shr-tag-style (_cont)
+(defun shr-tag-style (_dom)
)
-(defun shr-tag-script (_cont)
+(defun shr-tag-script (_dom)
)
-(defun shr-tag-comment (_cont)
+(defun shr-tag-comment (_dom)
)
(defun shr-dom-to-xml (dom)
+ (with-temp-buffer
+ (shr-dom-print dom)
+ (buffer-string)))
+
+(defun shr-dom-print (dom)
"Convert DOM into a string containing the xml representation."
- (let ((arg " ")
- (text "")
- url)
- (dolist (sub (cdr dom))
+ (insert (format "<%s" (dom-tag dom)))
+ (dolist (attr (dom-attributes dom))
+ ;; Ignore attributes that start with a colon because they are
+ ;; private elements.
+ (unless (= (aref (format "%s" (car attr)) 0) ?:)
+ (insert (format " %s=\"%s\"" (car attr) (cdr attr)))))
+ (insert ">")
+ (let (url)
+ (dolist (elem (dom-children dom))
(cond
- ((listp (cdr sub))
- ;; Ignore external image definitions if required.
- ;; <image xlink:href="http://TRACKING_URL/"/>
- (when (or (not (eq (car sub) 'image))
- (not (setq url (cdr (assq ':xlink:href (cdr sub)))))
- (not shr-blocked-images)
- (not (string-match shr-blocked-images url)))
- (setq text (concat text (shr-dom-to-xml sub)))))
- ((eq (car sub) 'text)
- (setq text (concat text (cdr sub))))
- (t
- (setq arg (concat arg (format "%s=\"%s\" "
- (substring (symbol-name (car sub)) 1)
- (cdr sub)))))))
- (format "<%s%s>%s</%s>"
- (car dom)
- (substring arg 0 (1- (length arg)))
- text
- (car dom))))
-
-(defun shr-tag-svg (cont)
+ ((stringp elem)
+ (insert elem))
+ ((eq (dom-tag elem) 'comment)
+ )
+ ((or (not (eq (dom-tag elem) 'image))
+ ;; Filter out blocked elements inside the SVG image.
+ (not (setq url (dom-attr elem ':xlink:href)))
+ (not shr-blocked-images)
+ (not (string-match shr-blocked-images url)))
+ (insert " ")
+ (shr-dom-print elem)))))
+ (insert (format "</%s>" (dom-tag dom))))
+
+(defun shr-tag-svg (dom)
(when (and (image-type-available-p 'svg)
(not shr-inhibit-images))
- (funcall shr-put-image-function
- (shr-dom-to-xml (cons 'svg cont))
- "SVG Image")))
+ (funcall shr-put-image-function (list (shr-dom-to-xml dom) 'image/svg+xml)
+ "SVG Image")))
-(defun shr-tag-sup (cont)
+(defun shr-tag-sup (dom)
(let ((start (point)))
- (shr-generic cont)
+ (shr-generic dom)
(put-text-property start (point) 'display '(raise 0.5))))
-(defun shr-tag-sub (cont)
+(defun shr-tag-sub (dom)
(let ((start (point)))
- (shr-generic cont)
+ (shr-generic dom)
(put-text-property start (point) 'display '(raise -0.5))))
-(defun shr-tag-label (cont)
- (shr-generic cont)
+(defun shr-tag-label (dom)
+ (shr-generic dom)
(shr-ensure-paragraph))
-(defun shr-tag-p (cont)
+(defun shr-tag-p (dom)
(shr-ensure-paragraph)
(shr-indent)
- (shr-generic cont)
+ (shr-generic dom)
(shr-ensure-paragraph))
-(defun shr-tag-div (cont)
+(defun shr-tag-div (dom)
(shr-ensure-newline)
(shr-indent)
- (shr-generic cont)
+ (shr-generic dom)
(shr-ensure-newline))
-(defun shr-tag-s (cont)
- (shr-fontize-cont cont 'shr-strike-through))
+(defun shr-tag-s (dom)
+ (shr-fontize-dom dom 'shr-strike-through))
-(defun shr-tag-del (cont)
- (shr-fontize-cont cont 'shr-strike-through))
+(defun shr-tag-del (dom)
+ (shr-fontize-dom dom 'shr-strike-through))
-(defun shr-tag-b (cont)
- (shr-fontize-cont cont 'bold))
+(defun shr-tag-b (dom)
+ (shr-fontize-dom dom 'bold))
-(defun shr-tag-i (cont)
- (shr-fontize-cont cont 'italic))
+(defun shr-tag-i (dom)
+ (shr-fontize-dom dom 'italic))
-(defun shr-tag-em (cont)
- (shr-fontize-cont cont 'italic))
+(defun shr-tag-em (dom)
+ (shr-fontize-dom dom 'italic))
-(defun shr-tag-strong (cont)
- (shr-fontize-cont cont 'bold))
+(defun shr-tag-strong (dom)
+ (shr-fontize-dom dom 'bold))
-(defun shr-tag-u (cont)
- (shr-fontize-cont cont 'underline))
+(defun shr-tag-u (dom)
+ (shr-fontize-dom dom 'underline))
(defun shr-parse-style (style)
(when style
@@ -1081,20 +1121,19 @@ ones, in case fg and bg are nil."
plist)))))
plist)))
-(defun shr-tag-base (cont)
- (let ((base (cdr (assq :href cont))))
- (when base
- (setq shr-base (shr-parse-base base))))
- (shr-generic cont))
+(defun shr-tag-base (dom)
+ (when-let (base (dom-attr dom 'href))
+ (setq shr-base (shr-parse-base base)))
+ (shr-generic dom))
-(defun shr-tag-a (cont)
- (let ((url (cdr (assq :href cont)))
- (title (cdr (assq :title cont)))
+(defun shr-tag-a (dom)
+ (let ((url (dom-attr dom 'href))
+ (title (dom-attr dom 'title))
(start (point))
shr-start)
- (shr-generic cont)
+ (shr-generic dom)
(when (and shr-target-id
- (equal (cdr (assq :name cont)) shr-target-id))
+ (equal (dom-attr dom 'name) shr-target-id))
;; We have a zero-length <a name="foo"> element, so just
;; insert... something.
(when (= start (point))
@@ -1105,19 +1144,33 @@ ones, in case fg and bg are nil."
(not shr-inhibit-decoration))
(shr-urlify (or shr-start start) (shr-expand-url url) title))))
-(defun shr-tag-object (cont)
- (let ((start (point))
- url)
- (dolist (elem cont)
- (when (eq (car elem) 'embed)
- (setq url (or url (cdr (assq :src (cdr elem))))))
- (when (and (eq (car elem) 'param)
- (equal (cdr (assq :name (cdr elem))) "movie"))
- (setq url (or url (cdr (assq :value (cdr elem)))))))
- (when url
- (shr-insert " [multimedia] ")
- (shr-urlify start (shr-expand-url url)))
- (shr-generic cont)))
+(defun shr-tag-object (dom)
+ (unless shr-inhibit-images
+ (let ((start (point))
+ url multimedia image)
+ (when-let (type (dom-attr dom 'type))
+ (when (string-match "\\`image/svg" type)
+ (setq url (dom-attr dom 'data)
+ image t)))
+ (dolist (child (dom-non-text-children dom))
+ (cond
+ ((eq (dom-tag child) 'embed)
+ (setq url (or url (dom-attr child 'src))
+ multimedia t))
+ ((and (eq (dom-tag child) 'param)
+ (equal (dom-attr child 'name) "movie"))
+ (setq url (or url (dom-attr child 'value))
+ multimedia t))))
+ (when url
+ (cond
+ (image
+ (shr-tag-img dom url)
+ (setq dom nil))
+ (multimedia
+ (shr-insert " [multimedia] ")
+ (shr-urlify start (shr-expand-url url)))))
+ (when dom
+ (shr-generic dom)))))
(defcustom shr-prefer-media-type-alist '(("webm" . 1.0)
("ogv" . 1.0)
@@ -1136,10 +1189,10 @@ url if no type is specified. The value should be a float in the range 0.0 to
(defun shr--get-media-pref (elem)
"Determine the preference for ELEM.
The preference is a float determined from `shr-prefer-media-type'."
- (let ((type (cdr (assq :type elem)))
+ (let ((type (dom-attr elem 'type))
(p 0.0))
(unless type
- (setq type (cdr (assq :src elem))))
+ (setq type (dom-attr elem 'src)))
(when type
(dolist (pref shr-prefer-media-type-alist)
(when (and
@@ -1148,61 +1201,61 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq p (cdr pref)))))
p))
-(defun shr--extract-best-source (cont &optional url pref)
- "Extract the best `:src' property from <source> blocks in CONT."
+(defun shr--extract-best-source (dom &optional url pref)
+ "Extract the best `:src' property from <source> blocks in DOM."
(setq pref (or pref -1.0))
(let (new-pref)
- (dolist (elem cont)
- (when (and (eq (car elem) 'source)
+ (dolist (elem (dom-non-text-children dom))
+ (when (and (eq (dom-tag elem) 'source)
(< pref
(setq new-pref
(shr--get-media-pref elem))))
(setq pref new-pref
- url (cdr (assq :src elem)))
+ url (dom-attr elem 'src))
;; libxml's html parser isn't HTML5 compliant and non terminated
;; source tags might end up as children. So recursion it is...
- (dolist (child (cdr elem))
- (when (eq (car child) 'source)
+ (dolist (child (dom-non-text-children elem))
+ (when (eq (dom-tag child) 'source)
(let ((ret (shr--extract-best-source (list child) url pref)))
(when (< pref (cdr ret))
(setq url (car ret)
pref (cdr ret)))))))))
(cons url pref))
-(defun shr-tag-video (cont)
- (let ((image (cdr (assq :poster cont)))
- (url (cdr (assq :src cont)))
+(defun shr-tag-video (dom)
+ (let ((image (dom-attr dom 'poster))
+ (url (dom-attr dom 'src))
(start (point)))
(unless url
- (setq url (car (shr--extract-best-source cont))))
+ (setq url (car (shr--extract-best-source dom))))
(if image
(shr-tag-img nil image)
(shr-insert " [video] "))
(shr-urlify start (shr-expand-url url))))
-(defun shr-tag-audio (cont)
- (let ((url (cdr (assq :src cont)))
+(defun shr-tag-audio (dom)
+ (let ((url (dom-attr dom 'src))
(start (point)))
(unless url
- (setq url (car (shr--extract-best-source cont))))
+ (setq url (car (shr--extract-best-source dom))))
(shr-insert " [audio] ")
(shr-urlify start (shr-expand-url url))))
-(defun shr-tag-img (cont &optional url)
+(defun shr-tag-img (dom &optional url)
(when (or url
- (and cont
- (> (length (cdr (assq :src cont))) 0)))
+ (and dom
+ (> (length (dom-attr dom 'src)) 0)))
(when (and (> (current-column) 0)
(not (eq shr-state 'image)))
(insert "\n"))
- (let ((alt (cdr (assq :alt cont)))
- (url (shr-expand-url (or url (cdr (assq :src cont))))))
+ (let ((alt (dom-attr dom 'alt))
+ (url (shr-expand-url (or url (dom-attr dom 'src)))))
(let ((start (point-marker)))
(when (zerop (length alt))
(setq alt "*"))
(cond
- ((or (member (cdr (assq :height cont)) '("0" "1"))
- (member (cdr (assq :width cont)) '("0" "1")))
+ ((or (member (dom-attr dom 'height) '("0" "1"))
+ (member (dom-attr dom 'width) '("0" "1")))
;; Ignore zero-sized or single-pixel images.
)
((and (not shr-inhibit-images)
@@ -1248,52 +1301,51 @@ The preference is a float determined from `shr-prefer-media-type'."
(put-text-property start (point) 'image-displayer
(shr-image-displayer shr-content-function))
(put-text-property start (point) 'help-echo
- (or (cdr (assq :title cont))
- alt)))
+ (shr-fold-text (or (dom-attr dom 'title) alt))))
(setq shr-state 'image)))))
-(defun shr-tag-pre (cont)
+(defun shr-tag-pre (dom)
(let ((shr-folding-mode 'none))
(shr-ensure-newline)
(shr-indent)
- (shr-generic cont)
+ (shr-generic dom)
(shr-ensure-newline)))
-(defun shr-tag-blockquote (cont)
+(defun shr-tag-blockquote (dom)
(shr-ensure-paragraph)
(shr-indent)
(let ((shr-indentation (+ shr-indentation 4)))
- (shr-generic cont))
+ (shr-generic dom))
(shr-ensure-paragraph))
-(defun shr-tag-dl (cont)
+(defun shr-tag-dl (dom)
(shr-ensure-paragraph)
- (shr-generic cont)
+ (shr-generic dom)
(shr-ensure-paragraph))
-(defun shr-tag-dt (cont)
+(defun shr-tag-dt (dom)
(shr-ensure-newline)
- (shr-generic cont)
+ (shr-generic dom)
(shr-ensure-newline))
-(defun shr-tag-dd (cont)
+(defun shr-tag-dd (dom)
(shr-ensure-newline)
(let ((shr-indentation (+ shr-indentation 4)))
- (shr-generic cont)))
+ (shr-generic dom)))
-(defun shr-tag-ul (cont)
+(defun shr-tag-ul (dom)
(shr-ensure-paragraph)
(let ((shr-list-mode 'ul))
- (shr-generic cont))
+ (shr-generic dom))
(shr-ensure-paragraph))
-(defun shr-tag-ol (cont)
+(defun shr-tag-ol (dom)
(shr-ensure-paragraph)
(let ((shr-list-mode 1))
- (shr-generic cont))
+ (shr-generic dom))
(shr-ensure-paragraph))
-(defun shr-tag-li (cont)
+(defun shr-tag-li (dom)
(shr-ensure-newline)
(shr-indent)
(let* ((bullet
@@ -1304,9 +1356,9 @@ The preference is a float determined from `shr-prefer-media-type'."
shr-bullet))
(shr-indentation (+ shr-indentation (length bullet))))
(insert bullet)
- (shr-generic cont)))
+ (shr-generic dom)))
-(defun shr-tag-br (cont)
+(defun shr-tag-br (dom)
(when (and (not (bobp))
;; Only add a newline if we break the current line, or
;; the previous line isn't a blank line.
@@ -1315,42 +1367,42 @@ The preference is a float determined from `shr-prefer-media-type'."
(not (= (char-after (- (point) 2)) ?\n)))))
(insert "\n")
(shr-indent))
- (shr-generic cont))
+ (shr-generic dom))
-(defun shr-tag-span (cont)
- (shr-generic cont))
+(defun shr-tag-span (dom)
+ (shr-generic dom))
-(defun shr-tag-h1 (cont)
- (shr-heading cont 'bold 'underline))
+(defun shr-tag-h1 (dom)
+ (shr-heading dom 'bold 'underline))
-(defun shr-tag-h2 (cont)
- (shr-heading cont 'bold))
+(defun shr-tag-h2 (dom)
+ (shr-heading dom 'bold))
-(defun shr-tag-h3 (cont)
- (shr-heading cont 'italic))
+(defun shr-tag-h3 (dom)
+ (shr-heading dom 'italic))
-(defun shr-tag-h4 (cont)
- (shr-heading cont))
+(defun shr-tag-h4 (dom)
+ (shr-heading dom))
-(defun shr-tag-h5 (cont)
- (shr-heading cont))
+(defun shr-tag-h5 (dom)
+ (shr-heading dom))
-(defun shr-tag-h6 (cont)
- (shr-heading cont))
+(defun shr-tag-h6 (dom)
+ (shr-heading dom))
-(defun shr-tag-hr (_cont)
+(defun shr-tag-hr (_dom)
(shr-ensure-newline)
- (insert (make-string shr-width shr-hr-line) "\n"))
+ (insert (make-string shr-internal-width shr-hr-line) "\n"))
-(defun shr-tag-title (cont)
- (shr-heading cont 'bold 'underline))
+(defun shr-tag-title (dom)
+ (shr-heading dom 'bold 'underline))
-(defun shr-tag-font (cont)
+(defun shr-tag-font (dom)
(let* ((start (point))
- (color (cdr (assq :color cont)))
+ (color (dom-attr dom 'color))
(shr-stylesheet (nconc (list (cons 'color color))
shr-stylesheet)))
- (shr-generic cont)
+ (shr-generic dom)
(when color
(shr-colorize-region start (point) color
(cdr (assq 'background-color shr-stylesheet))))))
@@ -1365,23 +1417,22 @@ The preference is a float determined from `shr-prefer-media-type'."
;; main buffer). Now we know how much space each TD really takes, so
;; we then render everything again with the new widths, and finally
;; insert all these boxes into the main buffer.
-(defun shr-tag-table-1 (cont)
- (setq cont (or (cdr (assq 'tbody cont))
- cont))
+(defun shr-tag-table-1 (dom)
+ (setq dom (or (dom-child-by-tag dom 'tbody) dom))
(let* ((shr-inhibit-images t)
(shr-table-depth (1+ shr-table-depth))
(shr-kinsoku-shorten t)
;; Find all suggested widths.
- (columns (shr-column-specs cont))
+ (columns (shr-column-specs dom))
;; Compute how many characters wide each TD should be.
(suggested-widths (shr-pro-rate-columns columns))
;; Do a "test rendering" to see how big each TD is (this can
;; be smaller (if there's little text) or bigger (if there's
;; unbreakable text).
- (sketch (shr-make-table cont suggested-widths))
+ (sketch (shr-make-table dom suggested-widths))
;; Compute the "natural" width by setting each column to 500
;; characters and see how wide they really render.
- (natural (shr-make-table cont (make-vector (length columns) 500)))
+ (natural (shr-make-table dom (make-vector (length columns) 500)))
(sketch-widths (shr-table-widths sketch natural suggested-widths)))
;; This probably won't work very well.
(when (> (+ (loop for width across sketch-widths
@@ -1390,15 +1441,16 @@ The preference is a float determined from `shr-prefer-media-type'."
(frame-width))
(setq truncate-lines t))
;; Then render the table again with these new "hard" widths.
- (shr-insert-table (shr-make-table cont sketch-widths t) sketch-widths)))
+ (shr-insert-table (shr-make-table dom sketch-widths t) sketch-widths)))
-(defun shr-tag-table (cont)
+(defun shr-tag-table (dom)
(shr-ensure-paragraph)
- (let* ((caption (cdr (assq 'caption cont)))
- (header (cdr (assq 'thead cont)))
- (body (or (cdr (assq 'tbody cont)) cont))
- (footer (cdr (assq 'tfoot cont)))
- (bgcolor (cdr (assq :bgcolor cont)))
+ (let* ((caption (dom-children (dom-child-by-tag dom 'caption)))
+ (header (dom-non-text-children (dom-child-by-tag dom 'thead)))
+ (body (dom-non-text-children (or (dom-child-by-tag dom 'tbody)
+ dom)))
+ (footer (dom-non-text-children (dom-child-by-tag dom 'tfoot)))
+ (bgcolor (dom-attr dom 'bgcolor))
(start (point))
(shr-stylesheet (nconc (list (cons 'background-color bgcolor))
shr-stylesheet))
@@ -1407,51 +1459,71 @@ The preference is a float determined from `shr-prefer-media-type'."
(nfooter (if footer (shr-max-columns footer))))
(if (and (not caption)
(not header)
- (not (cdr (assq 'tbody cont)))
- (not (cdr (assq 'tr cont)))
+ (not (dom-child-by-tag dom 'tbody))
+ (not (dom-child-by-tag dom 'tr))
(not footer))
;; The table is totally invalid and just contains random junk.
;; Try to output it anyway.
- (shr-generic cont)
+ (shr-generic dom)
;; It's a real table, so render it.
(shr-tag-table-1
(nconc
- (if caption `((tr (td ,@caption))))
- (if header
- (if footer
- ;; header + body + footer
- (if (= nheader nbody)
- (if (= nbody nfooter)
- `((tr (td (table (tbody ,@header ,@body ,@footer)))))
- (nconc `((tr (td (table (tbody ,@header ,@body)))))
- (if (= nfooter 1)
- footer
- `((tr (td (table (tbody ,@footer))))))))
- (nconc `((tr (td (table (tbody ,@header)))))
- (if (= nbody nfooter)
- `((tr (td (table (tbody ,@body ,@footer)))))
- (nconc `((tr (td (table (tbody ,@body)))))
- (if (= nfooter 1)
- footer
- `((tr (td (table (tbody ,@footer))))))))))
- ;; header + body
- (if (= nheader nbody)
- `((tr (td (table (tbody ,@header ,@body)))))
- (if (= nheader 1)
- `(,@header (tr (td (table (tbody ,@body)))))
- `((tr (td (table (tbody ,@header))))
- (tr (td (table (tbody ,@body))))))))
- (if footer
- ;; body + footer
- (if (= nbody nfooter)
- `((tr (td (table (tbody ,@body ,@footer)))))
- (nconc `((tr (td (table (tbody ,@body)))))
- (if (= nfooter 1)
- footer
- `((tr (td (table (tbody ,@footer))))))))
- (if caption
- `((tr (td (table (tbody ,@body)))))
- body))))))
+ (list 'table nil)
+ (if caption `((tr nil (td nil ,@caption))))
+ (cond (header
+ (if footer
+ ;; header + body + footer
+ (if (= nheader nbody)
+ (if (= nbody nfooter)
+ `((tr nil (td nil (table nil
+ (tbody nil ,@header
+ ,@body ,@footer)))))
+ (nconc `((tr nil (td nil (table nil
+ (tbody nil ,@header
+ ,@body)))))
+ (if (= nfooter 1)
+ footer
+ `((tr nil (td nil (table
+ nil (tbody
+ nil ,@footer))))))))
+ (nconc `((tr nil (td nil (table nil (tbody
+ nil ,@header)))))
+ (if (= nbody nfooter)
+ `((tr nil (td nil (table
+ nil (tbody nil ,@body
+ ,@footer)))))
+ (nconc `((tr nil (td nil (table
+ nil (tbody nil
+ ,@body)))))
+ (if (= nfooter 1)
+ footer
+ `((tr nil (td nil (table
+ nil
+ (tbody
+ nil
+ ,@footer))))))))))
+ ;; header + body
+ (if (= nheader nbody)
+ `((tr nil (td nil (table nil (tbody nil ,@header
+ ,@body)))))
+ (if (= nheader 1)
+ `(,@header (tr nil (td nil (table
+ nil (tbody nil ,@body)))))
+ `((tr nil (td nil (table nil (tbody nil ,@header))))
+ (tr nil (td nil (table nil (tbody nil ,@body)))))))))
+ (footer
+ ;; body + footer
+ (if (= nbody nfooter)
+ `((tr nil (td nil (table
+ nil (tbody nil ,@body ,@footer)))))
+ (nconc `((tr nil (td nil (table nil (tbody nil ,@body)))))
+ (if (= nfooter 1)
+ footer
+ `((tr nil (td nil (table
+ nil (tbody nil ,@footer)))))))))
+ (caption
+ `((tr nil (td nil (table nil (tbody nil ,@body))))))
+ (body)))))
(when bgcolor
(shr-colorize-region start (point) (cdr (assq 'color shr-stylesheet))
bgcolor))
@@ -1459,17 +1531,10 @@ The preference is a float determined from `shr-prefer-media-type'."
;; model isn't strong enough to allow us to put the images actually
;; into the tables.
(when (zerop shr-table-depth)
- (dolist (elem (shr-find-elements cont 'img))
- (shr-tag-img (cdr elem))))))
-
-(defun shr-find-elements (cont type)
- (let (result)
- (dolist (elem cont)
- (cond ((eq (car elem) type)
- (push elem result))
- ((consp (cdr elem))
- (setq result (nconc (shr-find-elements (cdr elem) type) result)))))
- (nreverse result)))
+ (dolist (elem (dom-by-tag dom 'object))
+ (shr-tag-object elem))
+ (dolist (elem (dom-by-tag dom 'img))
+ (shr-tag-img elem)))))
(defun shr-insert-table (table widths)
(let* ((collapse (equal (cdr (assq 'border-collapse shr-stylesheet))
@@ -1552,22 +1617,22 @@ The preference is a float determined from `shr-prefer-media-type'."
(aref widths i))))))))
widths))
-(defun shr-make-table (cont widths &optional fill)
- (or (cadr (assoc (list cont widths fill) shr-content-cache))
- (let ((data (shr-make-table-1 cont widths fill)))
- (push (list (list cont widths fill) data)
+(defun shr-make-table (dom widths &optional fill)
+ (or (cadr (assoc (list dom widths fill) shr-content-cache))
+ (let ((data (shr-make-table-1 dom widths fill)))
+ (push (list (list dom widths fill) data)
shr-content-cache)
data)))
-(defun shr-make-table-1 (cont widths &optional fill)
+(defun shr-make-table-1 (dom widths &optional fill)
(let ((trs nil)
(shr-inhibit-decoration (not fill))
(rowspans (make-vector (length widths) 0))
width colspan)
- (dolist (row cont)
- (when (eq (car row) 'tr)
+ (dolist (row (dom-non-text-children dom))
+ (when (eq (dom-tag row) 'tr)
(let ((tds nil)
- (columns (cdr row))
+ (columns (dom-children row))
(i 0)
(width-column 0)
column)
@@ -1581,12 +1646,12 @@ The preference is a float determined from `shr-prefer-media-type'."
(pop columns)
(aset rowspans i (1- (aref rowspans i)))
'(td)))
- (when (or (memq (car column) '(td th))
- (not column))
- (when (cdr (assq :rowspan (cdr column)))
+ (when (and (not (stringp column))
+ (or (memq (dom-tag column) '(td th))
+ (not column)))
+ (when-let (span (dom-attr column 'rowspan))
(aset rowspans i (+ (aref rowspans i)
- (1- (string-to-number
- (cdr (assq :rowspan (cdr column))))))))
+ (1- (string-to-number span)))))
;; Sanity check for invalid column-spans.
(when (>= width-column (length widths))
(setq width-column 0))
@@ -1595,7 +1660,7 @@ The preference is a float determined from `shr-prefer-media-type'."
(aref widths width-column)
10))
(when (and fill
- (setq colspan (cdr (assq :colspan (cdr column)))))
+ (setq colspan (dom-attr column 'colspan)))
(setq colspan (min (string-to-number colspan)
;; The colspan may be wrong, so
;; truncate it to the length of the
@@ -1610,18 +1675,18 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq width-column (+ width-column (1- colspan))))
(when (or column
(not fill))
- (push (shr-render-td (cdr column) width fill)
+ (push (shr-render-td column width fill)
tds))
(setq i (1+ i)
width-column (1+ width-column))))
(push (nreverse tds) trs))))
(nreverse trs)))
-(defun shr-render-td (cont width fill)
+(defun shr-render-td (dom width fill)
(with-temp-buffer
- (let ((bgcolor (cdr (assq :bgcolor cont)))
- (fgcolor (cdr (assq :fgcolor cont)))
- (style (cdr (assq :style cont)))
+ (let ((bgcolor (dom-attr dom 'bgcolor))
+ (fgcolor (dom-attr dom 'fgcolor))
+ (style (dom-attr dom 'style))
(shr-stylesheet shr-stylesheet)
actual-colors)
(when style
@@ -1633,9 +1698,9 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq style (nconc (list (cons 'color fgcolor)) style)))
(when style
(setq shr-stylesheet (append style shr-stylesheet)))
- (let ((shr-width width)
+ (let ((shr-internal-width width)
(shr-indentation 0))
- (shr-descend (cons 'td cont)))
+ (shr-descend dom))
;; Delete padding at the bottom of the TDs.
(delete-region
(point)
@@ -1656,7 +1721,7 @@ The preference is a float determined from `shr-prefer-media-type'."
(if (zerop (buffer-size))
(insert (make-string width ? ))
;; Otherwise, fill the buffer.
- (let ((align (cdr (assq :align cont)))
+ (let ((align (dom-attr dom 'align))
length)
(while (not (eobp))
(end-of-line)
@@ -1705,19 +1770,21 @@ The preference is a float determined from `shr-prefer-media-type'."
(dotimes (i (length columns))
(aset widths i (max (truncate (* (aref columns i)
total-percentage
- (- shr-width (1+ (length columns)))))
+ (- shr-internal-width
+ (1+ (length columns)))))
10)))
widths))
;; Return a summary of the number and shape of the TDs in the table.
-(defun shr-column-specs (cont)
- (let ((columns (make-vector (shr-max-columns cont) 1)))
- (dolist (row cont)
- (when (eq (car row) 'tr)
+(defun shr-column-specs (dom)
+ (let ((columns (make-vector (shr-max-columns dom) 1)))
+ (dolist (row (dom-non-text-children dom))
+ (when (eq (dom-tag row) 'tr)
(let ((i 0))
- (dolist (column (cdr row))
- (when (memq (car column) '(td th))
- (let ((width (cdr (assq :width (cdr column)))))
+ (dolist (column (dom-children row))
+ (when (and (not (stringp column))
+ (memq (dom-tag column) '(td th)))
+ (let ((width (dom-attr column 'width)))
(when (and width
(string-match "\\([0-9]+\\)%" width)
(not (zerop (setq width (string-to-number
@@ -1726,19 +1793,21 @@ The preference is a float determined from `shr-prefer-media-type'."
(setq i (1+ i)))))))
columns))
-(defun shr-count (cont elem)
+(defun shr-count (dom elem)
(let ((i 0))
- (dolist (sub cont)
- (when (eq (car sub) elem)
+ (dolist (sub (dom-children dom))
+ (when (and (not (stringp sub))
+ (eq (dom-tag sub) elem))
(setq i (1+ i))))
i))
-(defun shr-max-columns (cont)
+(defun shr-max-columns (dom)
(let ((max 0))
- (dolist (row cont)
- (when (eq (car row) 'tr)
- (setq max (max max (+ (shr-count (cdr row) 'td)
- (shr-count (cdr row) 'th))))))
+ (dolist (row (dom-children dom))
+ (when (and (not (stringp row))
+ (eq (dom-tag row) 'tr))
+ (setq max (max max (+ (shr-count row 'td)
+ (shr-count row 'th))))))
max))
(provide 'shr)