diff options
author | Lars Ingebrigtsen <larsi@gnus.org> | 2022-01-18 13:19:55 +0100 |
---|---|---|
committer | Lars Ingebrigtsen <larsi@gnus.org> | 2022-01-18 13:20:04 +0100 |
commit | ce63f9102545fa50abbe08a4083b332a9101c243 (patch) | |
tree | 82f332d6822cc27a7e5e4d405496694632a616d6 /lisp/international/textsec.el | |
parent | 4f23dbaa67183097f2aba1b93ace3646466faca9 (diff) | |
download | emacs-ce63f9102545fa50abbe08a4083b332a9101c243.tar.gz emacs-ce63f9102545fa50abbe08a4083b332a9101c243.tar.bz2 emacs-ce63f9102545fa50abbe08a4083b332a9101c243.zip |
Add textsec functions for verifying email addresses
* lisp/international/characters.el (bidi-control-characters):
Rename from glyphless--bidi-control-characters for use in textsec,
and add LRM/RLM/ALM.
(update-glyphless-char-display): Adjust the code.
* lisp/international/textsec.el (textsec-local-address-suspicious-p)
(textsec-name-suspicious-p, textsec-suspicious-nonspacing-p)
(textsec-email-suspicious-p): New functions.
Diffstat (limited to 'lisp/international/textsec.el')
-rw-r--r-- | lisp/international/textsec.el | 77 |
1 files changed, 77 insertions, 0 deletions
diff --git a/lisp/international/textsec.el b/lisp/international/textsec.el index d0d435ed7dc..55e4ce9d86c 100644 --- a/lisp/international/textsec.el +++ b/lisp/international/textsec.el @@ -28,6 +28,7 @@ (require 'ucs-normalize) (require 'idna-mapping) (require 'puny) +(require 'mail-parse) (defvar textsec--char-scripts nil) @@ -225,6 +226,9 @@ STRING isn't a single script string." (textsec-single-script-p string2))) (defun textsec-domain-suspicious-p (domain) + "Say whether DOMAIN looks suspicious. +If it isn't, nil is returned. If it is, a string explaining the +problem is returned." (catch 'found (seq-do (lambda (char) @@ -236,6 +240,79 @@ STRING isn't a single script string." (throw 'found "%s is not highly restrictive")) nil)) +(defun textsec-local-address-suspicious-p (local) + "Say whether LOCAL looks suspicious. +LOCAL is the bit before \"@\" in an email address. + +If it suspicious, nil is returned. If it is, a string explaining +the problem is returned." + (cond + ((not (equal local (ucs-normalize-NFKC-string local))) + (format "`%s' is not in normalized format `%s'" + local (ucs-normalize-NFKC-string local))) + ((textsec-mixed-numbers-p local) + (format "`%s' contains numbers from different number systems" local)) + ((eq (textsec-restriction-level local) 'unrestricted) + (format "`%s' isn't restrictive enough" local)) + ((string-match-p "\\`\\.\\|\\.\\'\\|\\.\\." local) + (format "`%s' contains invalid dots" local)))) + +(defun textsec-name-suspicious-p (name) + "Say whether NAME looks suspicious. +NAME is (for instance) the free-text name from an email address. + +If it suspicious, nil is returned. If it is, a string explaining +the problem is returned." + (cond + ((not (equal name (ucs-normalize-NFC-string name))) + (format "`%s' is not in normalized format `%s'" + name (ucs-normalize-NFC-string name))) + ((seq-find (lambda (char) + (and (member char bidi-control-characters) + (not (member char + '( ?\N{left-to-right mark} + ?\N{right-to-left mark} + ?\N{arabic letter mark}))))) + name) + (format "The string contains bidirectional control characters")) + ((textsec-suspicious-nonspacing-p name)))) + +(defun textsec-suspicious-nonspacing-p (string) + "Say whether STRING has a suspicious use of nonspacing characters. +If it suspicious, nil is returned. If it is, a string explaining +the problem is returned." + (let ((prev nil) + (nonspace-count 0)) + (catch 'found + (seq-do + (lambda (char) + (let ((nonspacing + (memq (get-char-code-property char 'general-category) + '(Cf Cc Mn)))) + (when (and nonspacing + (equal char prev)) + (throw 'found "Two identical nonspacing characters in a row")) + (setq nonspace-count (if nonspacing + (1+ nonspace-count) + 0)) + (when (> nonspace-count 4) + (throw 'found + "Excessive number of nonspacing characters in a row")) + (setq prev char))) + string) + nil))) + +(defun textsec-email-suspicious-p (email) + "Say whether EMAIL looks suspicious. +If it isn't, nil is returned. If it is, a string explaining the +problem is returned." + (pcase-let* ((`(,address . ,name) (mail-header-parse-address email t)) + (`(,local ,domain) (split-string address "@"))) + (or + (textsec-domain-suspicious-p domain) + (textsec-local-address-suspicious-p local) + (textsec-name-suspicious-p name)))) + (provide 'textsec) ;;; textsec.el ends here |