diff options
Diffstat (limited to 'test/lisp/legacy/decoder-tests.el')
-rw-r--r-- | test/lisp/legacy/decoder-tests.el | 349 |
1 files changed, 349 insertions, 0 deletions
diff --git a/test/lisp/legacy/decoder-tests.el b/test/lisp/legacy/decoder-tests.el new file mode 100644 index 00000000000..80ff5205ac5 --- /dev/null +++ b/test/lisp/legacy/decoder-tests.el @@ -0,0 +1,349 @@ +;;; decoder-tests.el --- test for text decoder + +;; Copyright (C) 2013-2015 Free Software Foundation, Inc. + +;; Author: Kenichi Handa <handa@gnu.org> + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. + +;;; Code: + +(require 'ert) + +;; Directory to hold test data files. +(defvar decoder-tests-workdir + (expand-file-name "decoder-tests" temporary-file-directory)) + +;; Remove all generated test files. +(defun decoder-tests-remove-files () + (delete-directory decoder-tests-workdir t)) + +;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or +;; binary) of a test file. +(defun decoder-tests-file-contents (content-type) + (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n") + (latin (concat ascii "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n")) + (binary (string-to-multibyte + (concat (string-as-unibyte latin) + (unibyte-string #xC0 #xC1 ?\n))))) + (cond ((eq content-type 'ascii) ascii) + ((eq content-type 'latin) latin) + ((eq content-type 'binary) binary) + (t + (error "Invalid file content type: %s" content-type))))) + +;; Generate FILE with CONTENTS encoded by CODING-SYSTEM. +;; whose encoding specified by CODING-SYSTEM. +(defun decoder-tests-gen-file (file contents coding-system) + (or (file-directory-p decoder-tests-workdir) + (mkdir decoder-tests-workdir t)) + (setq file (expand-file-name file decoder-tests-workdir)) + (with-temp-file file + (set-buffer-file-coding-system coding-system) + (insert contents)) + file) + +;;; The following three functions are filters for contents of a test +;;; file. + +;; Convert all LFs to CR LF sequences in the string STR. +(defun decoder-tests-lf-to-crlf (str) + (with-temp-buffer + (insert str) + (goto-char (point-min)) + (while (search-forward "\n" nil t) + (delete-char -1) + (insert "\r\n")) + (buffer-string))) + +;; Convert all LFs to CRs in the string STR. +(defun decoder-tests-lf-to-cr (str) + (with-temp-buffer + (insert str) + (subst-char-in-region (point-min) (point-max) ?\n ?\r) + (buffer-string))) + +;; Convert all LFs to LF LF sequences in the string STR. +(defun decoder-tests-lf-to-lflf (str) + (with-temp-buffer + (insert str) + (goto-char (point-min)) + (while (search-forward "\n" nil t) + (insert "\n")) + (buffer-string))) + +;; Prepend the UTF-8 BOM to STR. +(defun decoder-tests-add-bom (str) + (concat "\xfeff" str)) + +;; Return the name of test file whose contents specified by +;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM. +(defun decoder-tests-filename (content-type coding-system &optional ext) + (if ext + (expand-file-name (format "%s-%s.%s" content-type coding-system ext) + decoder-tests-workdir) + (expand-file-name (format "%s-%s" content-type coding-system) + decoder-tests-workdir))) + + +;;; Check ASCII optimizing decoder + +;; Generate a test file whose contents specified by CONTENT-TYPE and +;; whose encoding specified by CODING-SYSTEM. +(defun decoder-tests-ao-gen-file (content-type coding-system) + (let ((file (decoder-tests-filename content-type coding-system))) + (decoder-tests-gen-file file + (decoder-tests-file-contents content-type) + coding-system))) + +;; Test the decoding of a file whose contents and encoding are +;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the +;; file is read by READ-CODING and detected as DETECTED-CODING and the +;; contents is correctly decoded. +;; Optional 5th arg TRANSLATOR is a function to translate the original +;; file contents to match with the expected result of decoding. For +;; instance, when a file of dos eol-type is read by unix eol-type, +;; `decode-test-lf-to-crlf' must be specified. + +(defun decoder-tests (content-type write-coding read-coding detected-coding + &optional translator) + (prefer-coding-system 'utf-8-auto) + (let ((filename (decoder-tests-filename content-type write-coding))) + (with-temp-buffer + (let ((coding-system-for-read read-coding) + (contents (decoder-tests-file-contents content-type)) + (disable-ascii-optimization nil)) + (if translator + (setq contents (funcall translator contents))) + (insert-file-contents filename) + (if (and (coding-system-equal buffer-file-coding-system detected-coding) + (string= (buffer-string) contents)) + nil + (list buffer-file-coding-system + (string-to-list (buffer-string)) + (string-to-list contents))))))) + +(ert-deftest ert-test-decoder-ascii () + (unwind-protect + (progn + (dolist (eol-type '(unix dos mac)) + (decoder-tests-ao-gen-file 'ascii eol-type)) + (should-not (decoder-tests 'ascii 'unix 'undecided 'unix)) + (should-not (decoder-tests 'ascii 'dos 'undecided 'dos)) + (should-not (decoder-tests 'ascii 'dos 'dos 'dos)) + (should-not (decoder-tests 'ascii 'mac 'undecided 'mac)) + (should-not (decoder-tests 'ascii 'mac 'mac 'mac)) + (should-not (decoder-tests 'ascii 'dos 'utf-8 'utf-8-dos)) + (should-not (decoder-tests 'ascii 'dos 'unix 'unix + 'decoder-tests-lf-to-crlf)) + (should-not (decoder-tests 'ascii 'mac 'dos 'dos + 'decoder-tests-lf-to-cr)) + (should-not (decoder-tests 'ascii 'dos 'mac 'mac + 'decoder-tests-lf-to-lflf))) + (decoder-tests-remove-files))) + +(ert-deftest ert-test-decoder-latin () + (unwind-protect + (progn + (dolist (coding '("utf-8" "utf-8-with-signature")) + (dolist (eol-type '("unix" "dos" "mac")) + (decoder-tests-ao-gen-file 'latin + (intern (concat coding "-" eol-type))))) + (should-not (decoder-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix)) + (should-not (decoder-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix)) + (should-not (decoder-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos)) + (should-not (decoder-tests 'latin 'utf-8-dos 'utf-8-dos 'utf-8-dos)) + (should-not (decoder-tests 'latin 'utf-8-mac 'undecided 'utf-8-mac)) + (should-not (decoder-tests 'latin 'utf-8-mac 'utf-8-mac 'utf-8-mac)) + (should-not (decoder-tests 'latin 'utf-8-dos 'unix 'utf-8-unix + 'decoder-tests-lf-to-crlf)) + (should-not (decoder-tests 'latin 'utf-8-mac 'dos 'utf-8-dos + 'decoder-tests-lf-to-cr)) + (should-not (decoder-tests 'latin 'utf-8-dos 'mac 'utf-8-mac + 'decoder-tests-lf-to-lflf)) + (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'undecided + 'utf-8-with-signature-unix)) + (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8-auto + 'utf-8-with-signature-unix)) + (should-not (decoder-tests 'latin 'utf-8-with-signature-dos 'undecided + 'utf-8-with-signature-dos)) + (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8 + 'utf-8-unix 'decoder-tests-add-bom)) + (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8 + 'utf-8-unix 'decoder-tests-add-bom))) + (decoder-tests-remove-files))) + +(ert-deftest ert-test-decoder-binary () + (unwind-protect + (progn + (dolist (eol-type '("unix" "dos" "mac")) + (decoder-tests-ao-gen-file 'binary + (intern (concat "raw-text" "-" eol-type)))) + (should-not (decoder-tests 'binary 'raw-text-unix 'undecided + 'raw-text-unix)) + (should-not (decoder-tests 'binary 'raw-text-dos 'undecided + 'raw-text-dos)) + (should-not (decoder-tests 'binary 'raw-text-mac 'undecided + 'raw-text-mac)) + (should-not (decoder-tests 'binary 'raw-text-dos 'unix + 'raw-text-unix 'decoder-tests-lf-to-crlf)) + (should-not (decoder-tests 'binary 'raw-text-mac 'dos + 'raw-text-dos 'decoder-tests-lf-to-cr)) + (should-not (decoder-tests 'binary 'raw-text-dos 'mac + 'raw-text-mac 'decoder-tests-lf-to-lflf))) + (decoder-tests-remove-files))) + + +;;; Check the coding system `prefer-utf-8'. + +;; Read FILE. Check if the encoding was detected as DETECT. If +;; PREFER is non-nil, prefer that coding system before reading. + +(defun decoder-tests-prefer-utf-8-read (file detect prefer) + (with-temp-buffer + (with-coding-priority (if prefer (list prefer)) + (insert-file-contents file)) + (if (eq buffer-file-coding-system detect) + nil + (format "Invalid detection: %s" buffer-file-coding-system)))) + +;; Read FILE, modify it, and write it. Check if the coding system +;; used for writing was CODING. If CODING-TAG is non-nil, insert +;; coding tag with it before writing. If STR is non-nil, insert it +;; before writing. + +(defun decoder-tests-prefer-utf-8-write (file coding-tag coding + &optional str) + (with-temp-buffer + (insert-file-contents file) + (goto-char (point-min)) + (if coding-tag + (insert (format ";; -*- coding: %s; -*-\n" coding-tag)) + (insert ";;\n")) + (if str + (insert str)) + (write-file (decoder-tests-filename 'test 'test "el")) + (if (coding-system-equal buffer-file-coding-system coding) + nil + (format "Incorrect encoding: %s" last-coding-system-used)))) + +(ert-deftest ert-test-decoder-prefer-utf-8 () + (unwind-protect + (let ((ascii (decoder-tests-gen-file "ascii.el" + (decoder-tests-file-contents 'ascii) + 'unix)) + (latin (decoder-tests-gen-file "utf-8.el" + (decoder-tests-file-contents 'latin) + 'utf-8-unix))) + (should-not (decoder-tests-prefer-utf-8-read + ascii 'prefer-utf-8-unix nil)) + (should-not (decoder-tests-prefer-utf-8-read + latin 'utf-8-unix nil)) + (should-not (decoder-tests-prefer-utf-8-read + latin 'utf-8-unix 'iso-8859-1)) + (should-not (decoder-tests-prefer-utf-8-read + latin 'utf-8-unix 'sjis)) + (should-not (decoder-tests-prefer-utf-8-write + ascii nil 'prefer-utf-8-unix)) + (should-not (decoder-tests-prefer-utf-8-write + ascii 'iso-8859-1 'iso-8859-1-unix)) + (should-not (decoder-tests-prefer-utf-8-write + ascii nil 'utf-8-unix "À"))) + (decoder-tests-remove-files))) + + +;;; The following is for benchmark testing of the new optimized +;;; decoder, not for regression testing. + +(defun generate-ascii-file () + (dotimes (i 100000) + (insert-char ?a 80) + (insert "\n"))) + +(defun generate-rarely-nonascii-file () + (dotimes (i 100000) + (if (/= i 50000) + (insert-char ?a 80) + (insert ?À) + (insert-char ?a 79)) + (insert "\n"))) + +(defun generate-mostly-nonascii-file () + (dotimes (i 30000) + (insert-char ?a 80) + (insert "\n")) + (dotimes (i 20000) + (insert-char ?À 80) + (insert "\n")) + (dotimes (i 10000) + (insert-char ?あ 80) + (insert "\n"))) + + +(defvar test-file-list + '((generate-ascii-file + ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix) + ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix) + ("~/ascii-tag-none.unix" "" unix) + ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos) + ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos) + ("~/ascii-tag-none.dos" "" dos)) + (generate-rarely-nonascii-file + ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix) + ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix) + ("~/utf-8-r-tag-none.unix" "" utf-8-unix) + ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos) + ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos) + ("~/utf-8-r-tag-none.dos" "" utf-8-dos)) + (generate-mostly-nonascii-file + ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix) + ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix) + ("~/utf-8-m-tag-none.unix" "" utf-8-unix) + ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos) + ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos) + ("~/utf-8-m-tag-none.dos" "" utf-8-dos)))) + +(defun generate-benchmark-test-file () + (interactive) + (with-temp-buffer + (message "Generating data...") + (dolist (files test-file-list) + (delete-region (point-min) (point-max)) + (funcall (car files)) + (dolist (file (cdr files)) + (message "Writing %s..." (car file)) + (goto-char (point-min)) + (insert (nth 1 file) "\n") + (let ((coding-system-for-write (nth 2 file))) + (write-region (point-min) (point-max) (car file))) + (delete-region (point-min) (point)))))) + +(defun benchmark-decoder () + (let ((gc-cons-threshold 4000000)) + (insert "Without optimization:\n") + (dolist (files test-file-list) + (dolist (file (cdr files)) + (let* ((disable-ascii-optimization t) + (result (benchmark-run 10 + (with-temp-buffer (insert-file-contents (car file)))))) + (insert (format "%s: %s\n" (car file) result))))) + (insert "With optimization:\n") + (dolist (files test-file-list) + (dolist (file (cdr files)) + (let* ((disable-ascii-optimization nil) + (result (benchmark-run 10 + (with-temp-buffer (insert-file-contents (car file)))))) + (insert (format "%s: %s\n" (car file) result))))))) |