summaryrefslogtreecommitdiff
path: root/test/lisp/international
diff options
context:
space:
mode:
Diffstat (limited to 'test/lisp/international')
-rw-r--r--test/lisp/international/ccl-tests.el252
-rw-r--r--test/lisp/international/mule-tests.el39
-rw-r--r--test/lisp/international/mule-util-resources/utf-8.txt2
-rw-r--r--test/lisp/international/mule-util-tests.el51
-rw-r--r--test/lisp/international/textsec-tests.el214
-rw-r--r--test/lisp/international/ucs-normalize-tests.el180
6 files changed, 690 insertions, 48 deletions
diff --git a/test/lisp/international/ccl-tests.el b/test/lisp/international/ccl-tests.el
new file mode 100644
index 00000000000..cf472415c7a
--- /dev/null
+++ b/test/lisp/international/ccl-tests.el
@@ -0,0 +1,252 @@
+;;; ccl-tests.el --- unit tests for ccl.el -*- lexical-binding:t -*-
+
+;; Copyright (C) 2018-2022 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Code:
+
+(require 'ert)
+(require 'ccl)
+(require 'seq)
+
+
+(ert-deftest shift ()
+ (with-suppressed-warnings ((suspicious lsh))
+
+ ;; shift left +ve 5628 #x00000000000015fc
+ (should (= (ash 5628 8) 1440768)) ; #x000000000015fc00
+ (should (= (lsh 5628 8) 1440768)) ; #x000000000015fc00
+
+ ;; shift left -ve -5628 #x3fffffffffffea04
+ (should (= (ash -5628 8) -1440768)) ; #x3fffffffffea0400
+ (should (= (lsh -5628 8) -1440768)) ; #x3fffffffffea0400
+
+ ;; shift right +ve 5628 #x00000000000015fc
+ (should (= (ash 5628 -8) 21)) ; #x0000000000000015
+ (should (= (lsh 5628 -8) 21)) ; #x0000000000000015
+
+ ;; shift right -ve -5628 #x3fffffffffffea04
+ (should (= (ash -5628 -8) -22)) ; #x3fffffffffffffea
+ (should (= (lsh -5628 -8)
+ (ash (- -5628 (ash most-negative-fixnum 1)) -8)
+ (ash (logand (ash -5628 -1) most-positive-fixnum) -7)))))
+
+;; CCl program from `pgg-parse-crc24' in lisp/obsolete/pgg-parse.el
+(defconst prog-pgg-source
+ '(1
+ ((loop
+ (read r0) (r1 ^= r0) (r2 ^= 0)
+ (r5 = 0)
+ (loop
+ (r1 <<= 1)
+ (r1 += ((r2 >> 15) & 1))
+ (r2 <<= 1)
+ (if (r1 & 256)
+ ((r1 ^= 390) (r2 ^= 19707)))
+ (if (r5 < 7)
+ ((r5 += 1)
+ (repeat))))
+ (repeat)))))
+
+(defconst prog-pgg-code
+ [1 30 14 114744 114775 0 161 131127 1 148217 15 82167
+ 1 1848 131159 1 1595 5 256 114743 390 114775 19707
+ 1467 16 7 183 1 -5628 -7164 22])
+
+(defconst prog-pgg-dump
+"Out-buffer must be as large as in-buffer.
+Main-body:
+ 2:[read-register] read r0 (0 remaining)
+ 3:[set-assign-expr-register] r1 ^= r0
+ 4:[set-assign-expr-const] r2 ^= 0
+ 6:[set-short-const] r5 = 0
+ 7:[set-assign-expr-const] r1 <<= 1
+ 9:[set-expr-const] r7 = r2 >> 15
+ 11:[set-assign-expr-const] r7 &= 1
+ 13:[set-assign-expr-register] r1 += r7
+ 14:[set-assign-expr-const] r2 <<= 1
+ 16:[jump-cond-expr-const] if !(r1 & 256), jump to 23(+7)
+ 19:[set-assign-expr-const] r1 ^= 390
+ 21:[set-assign-expr-const] r2 ^= 19707
+ 23:[jump-cond-expr-const] if !(r5 < 7), jump to 29(+6)
+ 26:[set-assign-expr-const] r5 += 1
+ 28:[jump] jump to 7(-21)
+ 29:[jump] jump to 2(-27)
+At EOF:
+ 30:[end] end
+")
+
+(ert-deftest ccl-compile-pgg ()
+ (should (equal (ccl-compile prog-pgg-source) prog-pgg-code)))
+
+(ert-deftest ccl-dump-pgg ()
+ (with-temp-buffer
+ (ccl-dump prog-pgg-code)
+ (should (equal (buffer-string) prog-pgg-dump))))
+
+(defvar pgg-parse-crc24)
+(declare-function pgg-parse-crc24-string "pgg-parse" (string))
+
+(ert-deftest pgg-parse-crc24 ()
+ ;; Compiler
+ (require 'pgg)
+ (should (equal pgg-parse-crc24 prog-pgg-code))
+ ;; Interpreter
+ (should (equal (pgg-parse-crc24-string "foo") (concat [#x4f #xc2 #x55])))
+ (should (equal (pgg-parse-crc24-string "bar") (concat [#x51 #xd9 #x53])))
+ (should (equal (pgg-parse-crc24-string "baz") (concat [#xf0 #x58 #x6a]))))
+
+(ert-deftest pgg-parse-crc24-dump ()
+ ;; Disassembler
+ (require 'pgg)
+ (with-temp-buffer
+ (ccl-dump pgg-parse-crc24)
+ (should (equal (buffer-string) prog-pgg-dump))))
+
+;;----------------------------------------------------------------------------
+;; Program from 'midikbd-decoder in midi-kbd-0.2.el GNU ELPA package
+(defconst prog-midi-source
+ '(2
+ (loop
+ (loop
+ ;; central message receiver loop here.
+ ;; When it exits, the command to deal with is in r0
+ ;; Any arguments are in r1 and r2
+ ;; r3 contains: 0 if no arguments are accepted
+ ;; 1 if 1 argument can be accepted
+ ;; 2 if 2 arguments can be accepted
+ ;; 3 if the first of two arguments has been accepted
+ ;; Arguments are read into r1 and r2.
+ ;; r4 contains the current running status byte if any.
+ (read-if (r0 < #x80)
+ (branch r3
+ (repeat)
+ ((r1 = r0) (r0 = r4) (break))
+ ((r1 = r0) (r3 = 3) (repeat))
+ ((r2 = r0) (r3 = 2) (r0 = r4) (break))))
+ (if (r0 >= #xf8) ; real time message
+ (break))
+ (if (r0 < #xf0) ; channel command
+ ((r4 = r0)
+ (if ((r0 & #xe0) == #xc0)
+ ;; program change and channel pressure take only 1 argument
+ (r3 = 1)
+ (r3 = 2))
+ (repeat)))
+ ;; system common message, we swallow those for now
+ (r3 = 0)
+ (repeat))
+ (if ((r0 & #xf0) == #x90)
+ (if (r2 == 0) ; Some Midi devices use velocity 0
+ ; for switching notes off,
+ ; so translate into note-off
+ ; and fall through
+ (r0 -= #x10)
+ ((r0 &= #xf)
+ (write 0)
+ (write r0 r1 r2)
+ (repeat))))
+ (if ((r0 & #xf0) == #x80)
+ ((r0 &= #xf)
+ (write 1)
+ (write r0 r1 r2)
+ (repeat)))
+ (repeat))))
+
+(defconst prog-midi-code
+ [2 72 4893 16 128 1133 5 6 9 12 16 -2556 32 1024 6660 32 865
+ -4092 64 609 1024 4868 795 20 248 3844 3099 16 240 128 82169
+ 224 1275 18 192 353 260 609 -9468 97 -9980 82169 240 4091
+ 18 144 1371 18 0 16407 16 1796 81943 15 20 529 305 81 -14588
+ 82169 240 2555 18 128 81943 15 276 529 305 81 -17660 -17916 22])
+
+(defconst prog-midi-dump
+(concat "Out-buffer must be 2 times bigger than in-buffer.
+Main-body:
+ 2:[read-jump-cond-expr-const] read r0, if !(r0 < 128), jump to 22(+20)
+ 5:[branch] jump to array[r3] of length 4
+ 11 12 15 18 22 ""
+ 11:[jump] jump to 2(-9)
+ 12:[set-register] r1 = r0
+ 13:[set-register] r0 = r4
+ 14:[jump] jump to 41(+27)
+ 15:[set-register] r1 = r0
+ 16:[set-short-const] r3 = 3
+ 17:[jump] jump to 2(-15)
+ 18:[set-register] r2 = r0
+ 19:[set-short-const] r3 = 2
+ 20:[set-register] r0 = r4
+ 21:[jump] jump to 41(+20)
+ 22:[jump-cond-expr-const] if !(r0 >= 248), jump to 26(+4)
+ 25:[jump] jump to 41(+16)
+ 26:[jump-cond-expr-const] if !(r0 < 240), jump to 39(+13)
+ 29:[set-register] r4 = r0
+ 30:[set-expr-const] r7 = r0 & 224
+ 32:[jump-cond-expr-const] if !(r7 == 192), jump to 37(+5)
+ 35:[set-short-const] r3 = 1
+ 36:[jump] jump to 38(+2)
+ 37:[set-short-const] r3 = 2
+ 38:[jump] jump to 2(-36)
+ 39:[set-short-const] r3 = 0
+ 40:[jump] jump to 2(-38)
+ 41:[set-expr-const] r7 = r0 & 240
+ 43:[jump-cond-expr-const] if !(r7 == 144), jump to 59(+16)
+ 46:[jump-cond-expr-const] if !(r2 == 0), jump to 52(+6)
+ 49:[set-assign-expr-const] r0 -= 16
+ 51:[jump] jump to 59(+8)
+ 52:[set-assign-expr-const] r0 &= 15
+ 54:[write-const-string] write char \"\x00\"
+ 55:[write-register] write r0 (2 remaining)
+ 56:[write-register] write r1 (1 remaining)
+ 57:[write-register] write r2 (0 remaining)
+ 58:[jump] jump to 2(-56)
+ 59:[set-expr-const] r7 = r0 & 240
+ 61:[jump-cond-expr-const] if !(r7 == 128), jump to 71(+10)
+ 64:[set-assign-expr-const] r0 &= 15
+ 66:[write-const-string] write char \"\x01\"
+ 67:[write-register] write r0 (2 remaining)
+ 68:[write-register] write r1 (1 remaining)
+ 69:[write-register] write r2 (0 remaining)
+ 70:[jump] jump to 2(-68)
+ 71:[jump] jump to 2(-69)
+At EOF:
+ 72:[end] end
+"))
+
+(ert-deftest ccl-compile-midi ()
+ (should (equal (ccl-compile prog-midi-source) prog-midi-code)))
+
+(ert-deftest ccl-dump-midi ()
+ (with-temp-buffer
+ (ccl-dump prog-midi-code)
+ (should (equal (buffer-string) prog-midi-dump))))
+
+(ert-deftest ccl-hash-table ()
+ (let ((sym (gensym))
+ (table (make-hash-table :test 'eq)))
+ (puthash 16 17 table)
+ (puthash 17 16 table)
+ (define-translation-hash-table sym table)
+ (let* ((prog `(2
+ ((loop
+ (lookup-integer ,sym r0 r1)))))
+ (compiled (ccl-compile prog))
+ (registers [17 0 0 0 0 0 0 0]))
+ (ccl-execute compiled registers)
+ (should (equal registers [2 16 0 0 0 0 0 1])))))
+
+;;; ccl-tests.el ends here
diff --git a/test/lisp/international/mule-tests.el b/test/lisp/international/mule-tests.el
index 4265cec14af..94e864817f0 100644
--- a/test/lisp/international/mule-tests.el
+++ b/test/lisp/international/mule-tests.el
@@ -1,6 +1,6 @@
;;; mule-tests.el --- unit tests for mule.el -*- lexical-binding: t; -*-
-;; Copyright (C) 2017 Free Software Foundation, Inc.
+;; Copyright (C) 2017-2022 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
@@ -23,6 +23,8 @@
;;; Code:
+(require 'ert-x) ;For `ert-simulate-keys'.
+
(ert-deftest find-auto-coding--bug27391 ()
"Check that Bug#27391 is fixed."
(with-temp-buffer
@@ -36,4 +38,39 @@
(find-auto-coding "" (buffer-size)))
'(utf-8 . :coding)))))
+(ert-deftest mule-cmds-tests--encode-ebcdic ()
+ (should (equal (encode-coding-char ?a 'ebcdic-int) "\201"))
+ (should (not (multibyte-string-p (encode-coding-char ?a 'utf-8)))))
+
+(ert-deftest mule-cmds--test-universal-coding-system-argument ()
+ (should (equal "ccccccccccccccccab"
+ (let ((enable-recursive-minibuffers t))
+ (ert-simulate-keys
+ (kbd "C-x RET c u t f - 8 RET C-u C-u c a b RET")
+ (read-string "prompt:"))))))
+
+(ert-deftest mule-utf-7 ()
+ ;; utf-7 and utf-7-imap are not ASCII-compatible.
+ (should-not (coding-system-get 'utf-7 :ascii-compatible-p))
+ (should-not (coding-system-get 'utf-7-imap :ascii-compatible-p))
+ ;; Invariant ASCII subset.
+ (let ((s (apply #'string (append (number-sequence #x20 #x25)
+ (number-sequence #x27 #x7e)))))
+ (should (equal (encode-coding-string s 'utf-7-imap) s))
+ (should (equal (decode-coding-string s 'utf-7-imap) s)))
+ ;; Escaped ampersand.
+ (should (equal (encode-coding-string "a&bcd" 'utf-7-imap) "a&-bcd"))
+ (should (equal (decode-coding-string "a&-bcd" 'utf-7-imap) "a&bcd"))
+ ;; Ability to encode Unicode.
+ (should (equal (check-coding-systems-region "あ" nil '(utf-7-imap)) nil))
+ (should (equal (encode-coding-string "あ" 'utf-7-imap) "&MEI-"))
+ (should (equal (decode-coding-string "&MEI-" 'utf-7-imap) "あ")))
+
+(ert-deftest mule-hz ()
+ ;; The chinese-hz encoding is not ASCII compatible.
+ (should-not (coding-system-get 'chinese-hz :ascii-compatible-p)))
+
+;; Stop "Local Variables" above causing confusion when visiting this file.
+
+
;;; mule-tests.el ends here
diff --git a/test/lisp/international/mule-util-resources/utf-8.txt b/test/lisp/international/mule-util-resources/utf-8.txt
new file mode 100644
index 00000000000..385bbb4ba80
--- /dev/null
+++ b/test/lisp/international/mule-util-resources/utf-8.txt
@@ -0,0 +1,2 @@
+Thís is a test line 1.
+Line 2.
diff --git a/test/lisp/international/mule-util-tests.el b/test/lisp/international/mule-util-tests.el
index 01f40a227ca..cf29e0e290e 100644
--- a/test/lisp/international/mule-util-tests.el
+++ b/test/lisp/international/mule-util-tests.el
@@ -1,6 +1,6 @@
-;;; mule-util --- tests for international/mule-util.el
+;;; mule-util-tests.el --- tests for international/mule-util.el -*- lexical-binding:t -*-
-;; Copyright (C) 2002-2017 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2022 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
@@ -22,6 +22,7 @@
;;; Code:
(require 'ert)
+(require 'ert-x)
(require 'mule-util)
(defconst mule-util-test-truncate-data
@@ -75,10 +76,50 @@
(eval
`(ert-deftest ,testname ()
,testdoc
- (should (equal (apply 'truncate-string-to-width ',(car testdata))
- ,(cdr testdata)))))))
+ (let ((truncate-string-ellipsis "..."))
+ (should (equal (apply 'truncate-string-to-width ',(car testdata))
+ ,(cdr testdata))))))))
(dotimes (i (length mule-util-test-truncate-data))
(mule-util-test-truncate-create i))
-;;; mule-util.el ends here
+(ert-deftest filepos/bufferpos-tests-utf-8 ()
+ (let ((coding-system-for-read 'utf-8-unix))
+ (with-temp-buffer
+ (insert-file-contents (ert-resource-file "utf-8.txt"))
+ (should (eq buffer-file-coding-system 'utf-8-unix))
+ ;; First line is "Thís is a test line 1.".
+ ;; Bytes start counting at 0; chars at 1.
+ (should (= (filepos-to-bufferpos 1 'exact) 2))
+ (should (= (bufferpos-to-filepos 2 'exact) 1))
+ ;; After non-ASCII.
+ (should (= (filepos-to-bufferpos 4 'exact) 4))
+ (should (= (bufferpos-to-filepos 4 'exact) 4)))))
+
+(ert-deftest filepos/bufferpos-tests-binary ()
+ (let ((coding-system-for-read 'binary))
+ (with-temp-buffer
+ (insert-file-contents (ert-resource-file "utf-8.txt"))
+ (should (eq buffer-file-coding-system 'no-conversion))
+ ;; First line is "Thís is a test line 1.".
+ ;; Bytes start counting at 0; chars at 1.
+ (should (= (filepos-to-bufferpos 1 'exact) 2))
+ (should (= (bufferpos-to-filepos 2 'exact) 1))
+ ;; After non-ASCII.
+ (should (= (filepos-to-bufferpos 4 'exact) 5))
+ (should (= (bufferpos-to-filepos 5 'exact) 4)))))
+
+(ert-deftest filepos/bufferpos-tests-undecided ()
+ (let ((coding-system-for-read 'binary))
+ (with-temp-buffer
+ (insert-file-contents (ert-resource-file "utf-8.txt"))
+ (setq buffer-file-coding-system 'undecided)
+ (should-error (filepos-to-bufferpos 1 'exact))
+ (should-error (bufferpos-to-filepos 2 'exact))
+ (should (= (filepos-to-bufferpos 1 'approximate) 2))
+ (should (= (bufferpos-to-filepos 2 'approximate) 1))
+ ;; After non-ASCII.
+ (should (= (filepos-to-bufferpos 4 'approximate) 5))
+ (should (= (bufferpos-to-filepos 5 'approximate) 4)))))
+
+;;; mule-util-tests.el ends here
diff --git a/test/lisp/international/textsec-tests.el b/test/lisp/international/textsec-tests.el
new file mode 100644
index 00000000000..6b0773dc407
--- /dev/null
+++ b/test/lisp/international/textsec-tests.el
@@ -0,0 +1,214 @@
+;;; textsec-tests.el --- Tests for textsec.el -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2022 Free Software Foundation, Inc.
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;
+
+;;; Code:
+
+(require 'textsec)
+(require 'ert)
+(require 'ert-x)
+
+(ert-deftest test-scripts ()
+ (should (equal (textsec-scripts "Circle")
+ '((latin) (latin) (latin) (latin) (latin) (latin))))
+ (should (textsec-single-script-p "Circle"))
+
+ (should (equal (textsec-scripts "СігсӀе")
+ '((cyrillic) (cyrillic) (cyrillic)
+ (cyrillic) (cyrillic) (cyrillic))))
+ (should (textsec-single-script-p "СігсӀе"))
+
+ (should (equal (textsec-scripts "Сirсlе")
+ '((cyrillic) (latin) (latin) (cyrillic) (latin) (cyrillic))))
+ (should-not (textsec-single-script-p "Сirсlе"))
+
+ (should (equal (textsec-scripts "Circ1e")
+ '((latin) (latin) (latin) (latin) (common) (latin))))
+ (should (textsec-single-script-p "Circ1e"))
+
+ (should (equal (textsec-scripts "C𝗂𝗋𝖼𝗅𝖾")
+ '((latin) (common) (common) (common) (common) (common))))
+ (should (textsec-single-script-p "C𝗂𝗋𝖼𝗅𝖾"))
+
+ (should (equal (textsec-scripts "𝖢𝗂𝗋𝖼𝗅𝖾")
+ '((common) (common) (common) (common) (common) (common))))
+ (should (textsec-single-script-p "𝖢𝗂𝗋𝖼𝗅𝖾"))
+
+ (should (equal (textsec-scripts "〆切")
+ '((common han) (han))))
+ (should (textsec-single-script-p "〆切"))
+
+ (should (equal (textsec-scripts "ねガ")
+ '((hiragana) (katakana))))
+ (should (textsec-single-script-p "ねガ")))
+
+(ert-deftest test-minimal-scripts ()
+ (should (equal (textsec-covering-scripts "Circle")
+ '(latin)))
+ (should (equal (textsec-covering-scripts "Сirсlе")
+ '(cyrillic latin)))
+ (should (equal (textsec-covering-scripts "〆切")
+ '(han))))
+
+(ert-deftest test-restriction-level ()
+ (should (eq (textsec-restriction-level "foo")
+ 'ascii-only))
+ (should (eq (textsec-restriction-level "C𝗂𝗋𝖼𝗅𝖾")
+ 'single-script))
+ (should (eq (textsec-restriction-level "切foo")
+ 'highly-restrictive))
+ (should (eq (textsec-restriction-level "հfoo")
+ 'moderately-retrictive))
+ (should (eq (textsec-restriction-level "Сirсlе")
+ 'unrestricted)))
+
+(ert-deftest test-mixed-numbers ()
+ (should-not (textsec-mixed-numbers-p "foo"))
+ (should-not (textsec-mixed-numbers-p "8foo8"))
+ (should-not (textsec-mixed-numbers-p "foo20@foo.org"))
+ (should (textsec-mixed-numbers-p "8foo৪")))
+
+(ert-deftest test-resolved ()
+ (should (equal (textsec-resolved-script-set "ljeto")
+ '(latin)))
+ (should-not (textsec-resolved-script-set "Сirсlе")))
+
+(ert-deftest test-confusable ()
+ (should (equal (textsec-unconfuse-string "ljeto") "ljeto"))
+ (should (textsec-ascii-confusable-p "ljeto"))
+ (should-not (textsec-ascii-confusable-p "ljeto"))
+ (should (equal (textsec-unconfuse-string "~") "〜"))
+ (should-not (textsec-ascii-confusable-p "~"))
+
+ (should (textsec-single-script-confusable-p "ljeto" "ljeto"))
+ (should-not (textsec-single-script-confusable-p "paypal" "pаypаl"))
+ (should-not (textsec-single-script-confusable-p "scope""ѕсоре"))
+
+ (should-not (textsec-mixed-script-confusable-p "ljeto" "ljeto"))
+ (should (textsec-mixed-script-confusable-p "paypal" "pаypаl"))
+ (should (textsec-mixed-script-confusable-p "scope""ѕсоре"))
+
+ (should-not (textsec-whole-script-confusable-p "ljeto" "ljeto"))
+ (should-not (textsec-whole-script-confusable-p "paypal" "pаypаl"))
+ (should (textsec-whole-script-confusable-p "scope""ѕсоре")))
+
+(ert-deftest test-suspiction-domain ()
+ (should (textsec-domain-suspicious-p "foo/bar.org"))
+ (should-not (textsec-domain-suspicious-p "foo.org"))
+ (should (textsec-domain-suspicious-p "f\N{LEFT-TO-RIGHT ISOLATE}oo.org"))
+
+ (should (textsec-domain-suspicious-p "Сгсе.ru"))
+ (should-not (textsec-domain-suspicious-p "фСгсе.ru"))
+
+ (should-not (textsec-domain-suspicious-p
+ "21a:34aa:c782:3ad2:1bf8:73f8:141:66e8"))
+ (should (textsec-domain-suspicious-p
+ "21a:34aa:c782:3ad2:1bf8:73f8:141:66e8:66e8"))
+ (should-not (textsec-domain-suspicious-p
+ "[21a:34aa:c782:3ad2:1bf8:73f8:141:66e8]"))
+ (should (textsec-domain-suspicious-p
+ "[21a:34aa:c782:3ad2:1bf8:73f8:141:66e8"))
+ (should-not (textsec-domain-suspicious-p "138.25.106.12"))
+ (should-not (textsec-domain-suspicious-p "2001:db8::ff00:42:8329"))
+ (should-not (textsec-domain-suspicious-p "::ffff:129.55.2.201")))
+
+(ert-deftest test-suspicious-local ()
+ (should-not (textsec-local-address-suspicious-p "larsi"))
+ (should (textsec-local-address-suspicious-p ".larsi"))
+ (should (textsec-local-address-suspicious-p "larsi."))
+ (should-not (textsec-local-address-suspicious-p "la.rsi"))
+ (should (textsec-local-address-suspicious-p "lar..si"))
+
+ (should-not (textsec-local-address-suspicious-p "LÅRSI"))
+ (should (textsec-local-address-suspicious-p "LÅRSI"))
+
+ (should (textsec-local-address-suspicious-p "larsi8৪")))
+
+(ert-deftest test-suspicious-name ()
+ (should-not (textsec-name-suspicious-p "Lars Ingebrigtsen"))
+ (should (textsec-name-suspicious-p "LÅRS INGEBRIGTSEN"))
+ (should-not (textsec-name-suspicious-p "LÅRS INGEBRIGTSEN"))
+
+ (should (textsec-name-suspicious-p
+ "Lars Ingebrigtsen\N{LEFT-TO-RIGHT OVERRIDE}"))
+ (should (textsec-name-suspicious-p
+ "Lars Ingebrigtsen\N{LEFT-TO-RIGHT OVERRIDE}f"))
+ (should-not (textsec-name-suspicious-p
+ "Lars Ingebrigtsen\N{LEFT-TO-RIGHT MARK}"))
+ (should-not (textsec-name-suspicious-p "אבגד ⁧שונה⁩ מרגיל"))
+
+ (should (textsec-name-suspicious-p
+ "\N{COMBINING GRAVE ACCENT}\N{COMBINING GRAVE ACCENT}Lars Ingebrigtsen"))
+ (should-not (textsec-name-suspicious-p
+ "\N{COMBINING GRAVE ACCENT}\N{COMBINING ENCLOSING CIRCLE}Lars Ingebrigtsen"))
+ (should (textsec-name-suspicious-p
+ "\N{COMBINING GRAVE ACCENT}\N{COMBINING ENCLOSING CIRCLE}\N{COMBINING GRAVE ACCENT}\N{COMBINING ENCLOSING CIRCLE}\N{COMBINING GRAVE ACCENT}Lars Ingebrigtsen")))
+
+(ert-deftest test-suspicious-email ()
+ (should-not
+ (textsec-email-address-header-suspicious-p
+ "Lars Ingebrigtsen <larsi@gnus.org>"))
+ (should
+ (textsec-email-address-header-suspicious-p
+ "LÅrs Ingebrigtsen <larsi@gnus.org>"))
+ (should
+ (textsec-email-address-header-suspicious-p
+ "Lars Ingebrigtsen <.larsi@gnus.org>"))
+ (should
+ (textsec-email-address-header-suspicious-p
+ "Lars Ingebrigtsen <larsi@gn\N{LEFT-TO-RIGHT ISOLATE}us.org>"))
+
+ (should
+ (textsec-email-address-header-suspicious-p
+ "Lars Ingebrigtsen <larsi@\N{RIGHT-TO-LEFT OVERRIDE}gnus.org>"))
+
+ (should-not (textsec-email-address-header-suspicious-p
+ "דגבא <foo@bar.com>"))
+
+ (should (textsec-email-address-suspicious-p
+ "Bob_Norbolwits@GCSsafetyACE.com​")))
+
+(ert-deftest test-suspicious-url ()
+ (should-not (textsec-url-suspicious-p "http://example.ru/bar"))
+ (should (textsec-url-suspicious-p "http://Сгсе.ru/bar")))
+
+(ert-deftest test-suspicious-link ()
+ (should-not (textsec-link-suspicious-p
+ (cons "https://gnu.org/" "Hello")))
+ (should-not (textsec-link-suspicious-p
+ (cons "https://gnu.org/" "https://gnu.org/")))
+ (should-not (textsec-link-suspicious-p
+ (cons "https://gnu.org/" "https://www.gnu.org/")))
+ (should-not (textsec-link-suspicious-p
+ (cons "https://www.gnu.org/" "https://gnu.org/")))
+ (should (textsec-link-suspicious-p
+ (cons "https://www.gnu.org/" "https://org/")))
+ (should (textsec-link-suspicious-p
+ (cons "https://www.gnu.org/" "https://fsf.org/")))
+ (should (textsec-link-suspicious-p
+ (cons "https://www.gnu.org/" "http://fsf.org/")))
+
+ (should (textsec-link-suspicious-p
+ (cons "https://www.gn\N{LEFT-TO-RIGHT ISOLATE}u.org/"
+ "https://gn\N{LEFT-TO-RIGHT ISOLATE}u.org"))))
+
+;;; textsec-tests.el ends here
diff --git a/test/lisp/international/ucs-normalize-tests.el b/test/lisp/international/ucs-normalize-tests.el
index 94bf77633e1..9e359d5022f 100644
--- a/test/lisp/international/ucs-normalize-tests.el
+++ b/test/lisp/international/ucs-normalize-tests.el
@@ -1,6 +1,6 @@
-;;; ucs-normalize --- tests for international/ucs-normalize.el -*- lexical-binding: t -*-
+;;; ucs-normalize-tests.el --- tests for international/ucs-normalize.el -*- lexical-binding: t -*-
-;; Copyright (C) 2002-2017 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2022 Free Software Foundation, Inc.
;; This file is part of GNU Emacs.
@@ -123,9 +123,9 @@ The following invariants must be true for all conformant implementations..."
(defsubst ucs-normalize-tests--rule2-holds-p (X)
"Check 2nd conformance rule.
-For every code point X assigned in this version of Unicode that is not specifically
-listed in Part 1, the following invariants must be true for all conformant
-implementations:
+For every code point X assigned in this version of Unicode that
+is not specifically listed in Part 1, the following invariants
+must be true for all conformant implementations:
X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)"
(and (ucs-normalize-tests--normalization-chareq-p NFC X X)
@@ -181,26 +181,43 @@ implementations:
(should-not (ucs-normalize-tests--rule1-failing-for-partX 0)))
(defconst ucs-normalize-tests--failing-lines-part1
- (list 15131 15132 15133 15134 15135 15136 15137 15138
- 15139
- 16149 16150 16151 16152 16153 16154 16155 16156
- 16157 16158 16159 16160 16161 16162 16163 16164
- 16165 16166 16167 16168 16169 16170 16171 16172
- 16173 16174 16175 16176 16177 16178 16179 16180
- 16181 16182 16183 16184 16185 16186 16187 16188
- 16189 16190 16191 16192 16193 16194 16195 16196
- 16197 16198 16199 16200 16201 16202 16203 16204
- 16205 16206 16207 16208 16209 16210 16211 16212
- 16213 16214 16215 16216 16217 16218 16219 16220
- 16221 16222 16223 16224 16225 16226 16227 16228
- 16229 16230 16231 16232 16233 16234 16235 16236
- 16237 16238 16239 16240 16241 16242 16243 16244
- 16245 16246 16247 16248 16249 16250 16251 16252
- 16253 16254 16255 16256 16257 16258 16259 16260
- 16261 16262 16263 16264 16265 16266 16267 16268
- 16269 16270 16271 16272 16273 16274 16275 16276
- 16277 16278 16279 16280 16281 16282 16283 16284
- 16285 16286 16287 16288 16289))
+ (list 2412 2413 2414 15133 15134 15135 15136 15137
+ 15138 15139 15140 15141 15142 15143 15144 15145
+ 15146 15147 15148 15149 15150 15151 15152 15153
+ 15154 15155 15156 15157 15158 15159 15160 15161
+ 15162 15163 15164 15165 15166 15167 15168 15169
+ 15170 15171 15172 15173 15174 15175 15176 15177
+ 15178 15179 15180 15181 15182 15183 15184 15185
+ 15186 15187 15188 15192 15193 15194 15195 15196
+ 15197 15198 15199 15200 15201 16211 16212 16213
+ 16214 16215 16216 16217 16218 16219 16220 16221
+ 16222 16223 16224 16225 16226 16227 16228 16229
+ 16230 16231 16232 16233 16234 16235 16236 16237
+ 16238 16239 16240 16241 16242 16243 16244 16245
+ 16246 16247 16248 16249 16250 16251 16252 16253
+ 16254 16255 16256 16257 16258 16259 16260 16261
+ 16262 16263 16264 16265 16266 16267 16268 16269
+ 16270 16271 16272 16273 16274 16275 16276 16277
+ 16278 16279 16280 16281 16282 16283 16284 16285
+ 16286 16287 16288 16289 16290 16291 16292 16293
+ 16294 16295 16296 16297 16298 16299 16300 16301
+ 16302 16303 16304 16305 16306 16307 16308 16309
+ 16310 16311 16312 16313 16314 16315 16316 16317
+ 16318 16319 16320 16321 16322 16323 16324 16325
+ 16326 16327 16328 16329 16330 16331 16332 16333
+ 16334 16335 16336 16337 16338 16339 16340 16341
+ 16342 16343 16344 16345 16346 16347 16348 16349
+ 16350 16351 16352 16353 16354 16355 16356 16357
+ 16358 16359 16360 16361 16362 16363 16364 16365
+ 16366 16367 16368 16369 16370 16371 16372 16373
+ 16374 16375 16376 16377 16378 16379 16380 16381
+ 16382 16383 16384 16385 16386 16387 16388 16389
+ 16390 16391 16392 16393 16394 16395 16396 16397
+ 16398 16399 16400 16401 16402 16403 16404 16405
+ 16406 16407 16408 16409 16410 16411 16412 16413
+ 16550 16551 16552 16553 16554 16555 16556 16557
+ 16488 16489 16490 16491 16492 16493 16494 16495
+ 16496 16497 16558 16559))
;; Keep a record of failures, for consulting afterwards (the ert
;; backtrace only shows a truncated version of these lists).
@@ -232,12 +249,13 @@ implementations:
(ert-deftest ucs-normalize-part1 ()
:tags '(:expensive-test)
+ (skip-unless (not (getenv "EMACS_HYDRA_CI"))) ; SLOW ~ 1800s
;; This takes a long time, so make sure we're compiled.
(dolist (fun '(ucs-normalize-tests--part1-rule2
ucs-normalize-tests--rule1-failing-for-partX
ucs-normalize-tests--rule1-holds-p
ucs-normalize-tests--rule2-holds-p))
- (or (byte-code-function-p (symbol-function fun))
+ (or (compiled-function-p (symbol-function fun))
(byte-compile fun)))
(let ((ucs-normalize-tests--chars-part1 (make-char-table 'ucs-normalize-tests t)))
(setq ucs-normalize-tests--part1-rule1-failed-lines
@@ -258,21 +276,88 @@ implementations:
ucs-normalize-tests--failing-lines-part1)))
(defconst ucs-normalize-tests--failing-lines-part2
- (list 17656 17658 18006 18007 18008 18009 18010 18011
- 18012 18340 18342 18344 18346 18348 18350 18352
- 18354 18356 18358 18360 18362 18364 18366 18368
- 18370 18372 18374 18376 18378 18380 18382 18384
- 18386 18388 18390 18392 18394 18396 18398 18400
- 18402 18404 18406 18408 18410 18412 18414 18416
- 18418 18420 18422 18424 18426 18428 18430 18432
- 18434 18436 18438 18440 18442 18444 18446 18448
- 18450 18518 18520 18522 18524 18526 18528 18530
- 18532 18534 18536 18538 18540 18542 18544 18546
- 18548 18550 18552 18554 18556 18558 18560 18562
- 18564 18566 18568 18570 18572 18574 18576 18578
- 18580 18582 18584 18586 18588 18590 18592 18594
- 18596 18598 18600 18602 18604 18606 18608 18610
- 18612 18614 18616 18618 18620))
+ (list 17087 17088 17089 17090 17091 17092 17093 17094
+ 17098 17099 17100 17101 17102 17103 17104 17105
+ 17106 17107 17108 17113 17114 17115 17116 17117
+ 17118 17119 17120 17125 17126 17127 17128 17129
+ 17130 17131 17132 17133 17134 17135 17136 17137
+ 17138 17139 17140 17141 17142 17143 17144 17145
+ 17146 17157 17158 17159 17160 17161 17162 17163
+ 17164 17185 17186 17187 17188 17189 17190 17197
+ 17198 17199 17200 17207 17208 17209 17210 17211
+ 17212 17213 17214 17219 17220 17221 17222 17275
+ 17276 17285 17286 17295 17296 17309 17310 17311
+ 17312 17313 17314 17315 17316 17317 17318 17319
+ 17320 17325 17326 17373 17374 17419 17420 17421
+ 17422 17433 17434 17439 17440 17465 17466 17473
+ 17474 17479 17480 17485 17486 17491 17492 17497
+ 17498 17499 17500 17501 17502 17505 17506 17507
+ 17508 17511 17512 17519 17520 17523 17524 17527
+ 17528 17531 17532 17551 17552 17555 17556 17599
+ 17600 17601 17602 17603 17604 17605 17607 17608
+ 17609 17610 17611 17612 17613 17615 17617 17619
+ 17621 17623 17625 17627 17629 17631 17632 17633
+ 17634 17635 17636 17637 17638 17639 17640 17669
+ 17670 17675 17676 17681 17682 17689 17690 17691
+ 17692 17693 17694 17707 17708 17713 17714 17715
+ 17716 17727 17728 17733 17734 17739 17740 17745
+ 17746 17749 17750 17753 17754 17759 17760 17767
+ 17768 17789 17790 17801 17802 17807 17808 17809
+ 17810 17811 17812 17813 17814 17815 17816 17821
+ 17822 17829 17830 17843 17844 17845 17846 17851
+ 17852 17861 17875 17876 17879 17880 17899 17900
+ 17097 17907 17908 17911 17912 17913 17914 17915
+ 17916 17917 17918 17919 17920 17921 17922 17927
+ 17928 17929 17930 17931 17932 17933 17935 17937
+ 17938 17939 17940 17941 17943 17945 17947 17949
+ 17951 17952 17953 17955 17957 17959 17961 17962
+ 17967 17968 17987 17988 17993 17994 18003 18004
+ 18005 18006 18007 18008 18009 18010 18011 18012
+ 18017 18018 18019 18020 18021 18022 18023 18024
+ 18041 18042 18049 18050 18053 18054 18055 18056
+ 18069 18070 18079 18080 18163 18164 18165 18166
+ 18171 18172 18175 18176 18211 18212 18219 18220
+ 18221 18222 18223 18224 18225 18226 18301 18302
+ 18389 18390 18391 18392 18393 18394 18397 18398
+ 18407 18408 18439 18440 18441 18442 18443 18444
+ 18445 18446 18447 18448 18449 18450 18451 18452
+ 18457 18458 18459 18460 18471 18472 18479 18480
+ 18485 18486 18499 18500 18501 18502 18509 18510
+ 18513 18514 18515 18516 18517 18518 18519 18520
+ 18521 18523 18524 18525 18527 18528 18531 18537
+ 18538 18539 18541 18543 18545 18547 18549 18550
+ 18551 18553 18554 18555 18557 18558 18559 18560
+ 18561 18562 18563 18564 18565 18566 18567 18569
+ 18571 18573 18575 18577 18579 18581 18583 18585
+ 18587 18589 18591 18593 18595 18596 18597 18599
+ 18601 18602 18603 18605 18606 18607 18609 18611
+ 18612 18613 18615 18617 18618 18619 18621 18622
+ 18623 18624 18625 18626 18627 18628 18629 18631
+ 18632 18633 18634 18635 18636 18637 18639 18641
+ 18643 18645 18647 18649 18651 18653 18655 18657
+ 18659 18661 18663 18664 18665 18667 18668 18669
+ 18670 18671 18673 18674 18675 18676 18677 18679
+ 18680 18681 18683 18685 18686 18687 18688 18689
+ 18690 18691 18692 18693 18694 18695 18696 18697
+ 18698 18699 18700 18701 18702 18703 18704 18705
+ 18706 18707 18708 18709 18710 18711 18712 18713
+ 18714 18715 18717 18719 18721 18722 18723 18724
+ 18725 18727 18729 18731 18733 18735 18737 18739
+ 18740 18741 18742 18743 18745 18747 18749 18751
+ 18753 18755 18757 18759 18761 18763 18765 18767
+ 18769 18771 18773 18775 18777 18779 18781 18783
+ 18785 18787 18789 18791 18793 18795 18797 18799
+ 18801 18803 18805 18807 18809 18811 18813 18815
+ 18817 18819 18821 18823 18825 18827 18829 18831
+ 18833 18835 18837 18839 18840 18841 18842 18843
+ 18844 18845 18846 18847 18848 18849 18850 18851
+ 18852 18853 18855 18857 18859 18861 18863 18865
+ 18866 18867 18869 18871 18873 18875 18877 18879
+ 18881 18883 18885 18887 18888 18889 18891 18893
+ 18895 18897 18899 18901 18903 18905 18907 18909
+ 18911 18913 18914 18915 18916 18917 18918 18919
+ 18920 18921 18923 18925 18927 18929 18931 18933
+ 18935 18937 18939 18941 18943 18945 18947 18948))
(ert-deftest ucs-normalize-part2 ()
:tags '(:expensive-test)
@@ -299,7 +384,7 @@ implementations:
(list " var var))
(dolist (linos (seq-partition newval 8))
(insert (mapconcat #'number-to-string linos " ") "\n"))
- (insert ")\)"))
+ (insert "))"))
(defun ucs-normalize-check-failing-lines ()
(interactive)
@@ -333,4 +418,15 @@ implementations:
(display-buffer (current-buffer)))
(message "No changes to failing lines needed"))))
+(ert-deftest ucs-normalize-save-match-data ()
+ "Verify that match data isn't clobbered (bug#41445)"
+ (string-match (rx (+ digit)) "a47b")
+ (should (equal (match-data t) '(1 3)))
+ (should (equal
+ (decode-coding-string
+ (encode-coding-string "Käsesoßenrührlöffel" 'utf-8-hfs)
+ 'utf-8-hfs)
+ "Käsesoßenrührlöffel"))
+ (should (equal (match-data t) '(1 3))))
+
;;; ucs-normalize-tests.el ends here