summaryrefslogtreecommitdiff
path: root/lisp/international/textsec.el
diff options
context:
space:
mode:
authorLars Ingebrigtsen <larsi@gnus.org>2022-01-18 13:19:55 +0100
committerLars Ingebrigtsen <larsi@gnus.org>2022-01-18 13:20:04 +0100
commitce63f9102545fa50abbe08a4083b332a9101c243 (patch)
tree82f332d6822cc27a7e5e4d405496694632a616d6 /lisp/international/textsec.el
parent4f23dbaa67183097f2aba1b93ace3646466faca9 (diff)
downloademacs-ce63f9102545fa50abbe08a4083b332a9101c243.tar.gz
emacs-ce63f9102545fa50abbe08a4083b332a9101c243.tar.bz2
emacs-ce63f9102545fa50abbe08a4083b332a9101c243.zip
Add textsec functions for verifying email addresses
* lisp/international/characters.el (bidi-control-characters): Rename from glyphless--bidi-control-characters for use in textsec, and add LRM/RLM/ALM. (update-glyphless-char-display): Adjust the code. * lisp/international/textsec.el (textsec-local-address-suspicious-p) (textsec-name-suspicious-p, textsec-suspicious-nonspacing-p) (textsec-email-suspicious-p): New functions.
Diffstat (limited to 'lisp/international/textsec.el')
-rw-r--r--lisp/international/textsec.el77
1 files changed, 77 insertions, 0 deletions
diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el
index d0d435ed7dc..55e4ce9d86c 100644
--- a/lisp/international/textsec.el
+++ b/lisp/international/textsec.el
@@ -28,6 +28,7 @@
(require 'ucs-normalize)
(require 'idna-mapping)
(require 'puny)
+(require 'mail-parse)
(defvar textsec--char-scripts nil)
@@ -225,6 +226,9 @@ STRING isn't a single script string."
(textsec-single-script-p string2)))
(defun textsec-domain-suspicious-p (domain)
+ "Say whether DOMAIN looks suspicious.
+If it isn't, nil is returned. If it is, a string explaining the
+problem is returned."
(catch 'found
(seq-do
(lambda (char)
@@ -236,6 +240,79 @@ STRING isn't a single script string."
(throw 'found "%s is not highly restrictive"))
nil))
+(defun textsec-local-address-suspicious-p (local)
+ "Say whether LOCAL looks suspicious.
+LOCAL is the bit before \"@\" in an email address.
+
+If it suspicious, nil is returned. If it is, a string explaining
+the problem is returned."
+ (cond
+ ((not (equal local (ucs-normalize-NFKC-string local)))
+ (format "`%s' is not in normalized format `%s'"
+ local (ucs-normalize-NFKC-string local)))
+ ((textsec-mixed-numbers-p local)
+ (format "`%s' contains numbers from different number systems" local))
+ ((eq (textsec-restriction-level local) 'unrestricted)
+ (format "`%s' isn't restrictive enough" local))
+ ((string-match-p "\\`\\.\\|\\.\\'\\|\\.\\." local)
+ (format "`%s' contains invalid dots" local))))
+
+(defun textsec-name-suspicious-p (name)
+ "Say whether NAME looks suspicious.
+NAME is (for instance) the free-text name from an email address.
+
+If it suspicious, nil is returned. If it is, a string explaining
+the problem is returned."
+ (cond
+ ((not (equal name (ucs-normalize-NFC-string name)))
+ (format "`%s' is not in normalized format `%s'"
+ name (ucs-normalize-NFC-string name)))
+ ((seq-find (lambda (char)
+ (and (member char bidi-control-characters)
+ (not (member char
+ '( ?\N{left-to-right mark}
+ ?\N{right-to-left mark}
+ ?\N{arabic letter mark})))))
+ name)
+ (format "The string contains bidirectional control characters"))
+ ((textsec-suspicious-nonspacing-p name))))
+
+(defun textsec-suspicious-nonspacing-p (string)
+ "Say whether STRING has a suspicious use of nonspacing characters.
+If it suspicious, nil is returned. If it is, a string explaining
+the problem is returned."
+ (let ((prev nil)
+ (nonspace-count 0))
+ (catch 'found
+ (seq-do
+ (lambda (char)
+ (let ((nonspacing
+ (memq (get-char-code-property char 'general-category)
+ '(Cf Cc Mn))))
+ (when (and nonspacing
+ (equal char prev))
+ (throw 'found "Two identical nonspacing characters in a row"))
+ (setq nonspace-count (if nonspacing
+ (1+ nonspace-count)
+ 0))
+ (when (> nonspace-count 4)
+ (throw 'found
+ "Excessive number of nonspacing characters in a row"))
+ (setq prev char)))
+ string)
+ nil)))
+
+(defun textsec-email-suspicious-p (email)
+ "Say whether EMAIL looks suspicious.
+If it isn't, nil is returned. If it is, a string explaining the
+problem is returned."
+ (pcase-let* ((`(,address . ,name) (mail-header-parse-address email t))
+ (`(,local ,domain) (split-string address "@")))
+ (or
+ (textsec-domain-suspicious-p domain)
+ (textsec-local-address-suspicious-p local)
+ (textsec-name-suspicious-p name))))
+
(provide 'textsec)
;;; textsec.el ends here