diff options
Diffstat (limited to 'test/automated/decoder-tests.el')
-rw-r--r-- | test/automated/decoder-tests.el | 349 |
1 files changed, 0 insertions, 349 deletions
diff --git a/test/automated/decoder-tests.el b/test/automated/decoder-tests.el deleted file mode 100644 index 5699fec7d17..00000000000 --- a/test/automated/decoder-tests.el +++ /dev/null @@ -1,349 +0,0 @@ -;;; decoder-tests.el --- test for text decoder - -;; Copyright (C) 2013-2016 Free Software Foundation, Inc. - -;; Author: Kenichi Handa <handa@gnu.org> - -;; This file is part of GNU Emacs. - -;; GNU Emacs is free software: you can redistribute it and/or modify -;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation, either version 3 of the License, or -;; (at your option) any later version. - -;; GNU Emacs is distributed in the hope that it will be useful, -;; but WITHOUT ANY WARRANTY; without even the implied warranty of -;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -;; GNU General Public License for more details. - -;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. - -;;; Code: - -(require 'ert) - -;; Directory to hold test data files. -(defvar decoder-tests-workdir - (expand-file-name "decoder-tests" temporary-file-directory)) - -;; Remove all generated test files. -(defun decoder-tests-remove-files () - (delete-directory decoder-tests-workdir t)) - -;; Return the contents (specified by CONTENT-TYPE; ascii, latin, or -;; binary) of a test file. -(defun decoder-tests-file-contents (content-type) - (let* ((ascii "ABCDEFGHIJKLMNOPQRSTUVWXYZ\n") - (latin (concat ascii "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏ\n")) - (binary (string-to-multibyte - (concat (string-as-unibyte latin) - (unibyte-string #xC0 #xC1 ?\n))))) - (cond ((eq content-type 'ascii) ascii) - ((eq content-type 'latin) latin) - ((eq content-type 'binary) binary) - (t - (error "Invalid file content type: %s" content-type))))) - -;; Generate FILE with CONTENTS encoded by CODING-SYSTEM. -;; whose encoding specified by CODING-SYSTEM. -(defun decoder-tests-gen-file (file contents coding-system) - (or (file-directory-p decoder-tests-workdir) - (mkdir decoder-tests-workdir t)) - (setq file (expand-file-name file decoder-tests-workdir)) - (with-temp-file file - (set-buffer-file-coding-system coding-system) - (insert contents)) - file) - -;;; The following three functions are filters for contents of a test -;;; file. - -;; Convert all LFs to CR LF sequences in the string STR. -(defun decoder-tests-lf-to-crlf (str) - (with-temp-buffer - (insert str) - (goto-char (point-min)) - (while (search-forward "\n" nil t) - (delete-char -1) - (insert "\r\n")) - (buffer-string))) - -;; Convert all LFs to CRs in the string STR. -(defun decoder-tests-lf-to-cr (str) - (with-temp-buffer - (insert str) - (subst-char-in-region (point-min) (point-max) ?\n ?\r) - (buffer-string))) - -;; Convert all LFs to LF LF sequences in the string STR. -(defun decoder-tests-lf-to-lflf (str) - (with-temp-buffer - (insert str) - (goto-char (point-min)) - (while (search-forward "\n" nil t) - (insert "\n")) - (buffer-string))) - -;; Prepend the UTF-8 BOM to STR. -(defun decoder-tests-add-bom (str) - (concat "\xfeff" str)) - -;; Return the name of test file whose contents specified by -;; CONTENT-TYPE and whose encoding specified by CODING-SYSTEM. -(defun decoder-tests-filename (content-type coding-system &optional ext) - (if ext - (expand-file-name (format "%s-%s.%s" content-type coding-system ext) - decoder-tests-workdir) - (expand-file-name (format "%s-%s" content-type coding-system) - decoder-tests-workdir))) - - -;;; Check ASCII optimizing decoder - -;; Generate a test file whose contents specified by CONTENT-TYPE and -;; whose encoding specified by CODING-SYSTEM. -(defun decoder-tests-ao-gen-file (content-type coding-system) - (let ((file (decoder-tests-filename content-type coding-system))) - (decoder-tests-gen-file file - (decoder-tests-file-contents content-type) - coding-system))) - -;; Test the decoding of a file whose contents and encoding are -;; specified by CONTENT-TYPE and WRITE-CODING. The test passes if the -;; file is read by READ-CODING and detected as DETECTED-CODING and the -;; contents is correctly decoded. -;; Optional 5th arg TRANSLATOR is a function to translate the original -;; file contents to match with the expected result of decoding. For -;; instance, when a file of dos eol-type is read by unix eol-type, -;; `decode-test-lf-to-crlf' must be specified. - -(defun decoder-tests (content-type write-coding read-coding detected-coding - &optional translator) - (prefer-coding-system 'utf-8-auto) - (let ((filename (decoder-tests-filename content-type write-coding))) - (with-temp-buffer - (let ((coding-system-for-read read-coding) - (contents (decoder-tests-file-contents content-type)) - (disable-ascii-optimization nil)) - (if translator - (setq contents (funcall translator contents))) - (insert-file-contents filename) - (if (and (coding-system-equal buffer-file-coding-system detected-coding) - (string= (buffer-string) contents)) - nil - (list buffer-file-coding-system - (string-to-list (buffer-string)) - (string-to-list contents))))))) - -(ert-deftest ert-test-decoder-ascii () - (unwind-protect - (progn - (dolist (eol-type '(unix dos mac)) - (decoder-tests-ao-gen-file 'ascii eol-type)) - (should-not (decoder-tests 'ascii 'unix 'undecided 'unix)) - (should-not (decoder-tests 'ascii 'dos 'undecided 'dos)) - (should-not (decoder-tests 'ascii 'dos 'dos 'dos)) - (should-not (decoder-tests 'ascii 'mac 'undecided 'mac)) - (should-not (decoder-tests 'ascii 'mac 'mac 'mac)) - (should-not (decoder-tests 'ascii 'dos 'utf-8 'utf-8-dos)) - (should-not (decoder-tests 'ascii 'dos 'unix 'unix - 'decoder-tests-lf-to-crlf)) - (should-not (decoder-tests 'ascii 'mac 'dos 'dos - 'decoder-tests-lf-to-cr)) - (should-not (decoder-tests 'ascii 'dos 'mac 'mac - 'decoder-tests-lf-to-lflf))) - (decoder-tests-remove-files))) - -(ert-deftest ert-test-decoder-latin () - (unwind-protect - (progn - (dolist (coding '("utf-8" "utf-8-with-signature")) - (dolist (eol-type '("unix" "dos" "mac")) - (decoder-tests-ao-gen-file 'latin - (intern (concat coding "-" eol-type))))) - (should-not (decoder-tests 'latin 'utf-8-unix 'undecided 'utf-8-unix)) - (should-not (decoder-tests 'latin 'utf-8-unix 'utf-8-unix 'utf-8-unix)) - (should-not (decoder-tests 'latin 'utf-8-dos 'undecided 'utf-8-dos)) - (should-not (decoder-tests 'latin 'utf-8-dos 'utf-8-dos 'utf-8-dos)) - (should-not (decoder-tests 'latin 'utf-8-mac 'undecided 'utf-8-mac)) - (should-not (decoder-tests 'latin 'utf-8-mac 'utf-8-mac 'utf-8-mac)) - (should-not (decoder-tests 'latin 'utf-8-dos 'unix 'utf-8-unix - 'decoder-tests-lf-to-crlf)) - (should-not (decoder-tests 'latin 'utf-8-mac 'dos 'utf-8-dos - 'decoder-tests-lf-to-cr)) - (should-not (decoder-tests 'latin 'utf-8-dos 'mac 'utf-8-mac - 'decoder-tests-lf-to-lflf)) - (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'undecided - 'utf-8-with-signature-unix)) - (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8-auto - 'utf-8-with-signature-unix)) - (should-not (decoder-tests 'latin 'utf-8-with-signature-dos 'undecided - 'utf-8-with-signature-dos)) - (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8 - 'utf-8-unix 'decoder-tests-add-bom)) - (should-not (decoder-tests 'latin 'utf-8-with-signature-unix 'utf-8 - 'utf-8-unix 'decoder-tests-add-bom))) - (decoder-tests-remove-files))) - -(ert-deftest ert-test-decoder-binary () - (unwind-protect - (progn - (dolist (eol-type '("unix" "dos" "mac")) - (decoder-tests-ao-gen-file 'binary - (intern (concat "raw-text" "-" eol-type)))) - (should-not (decoder-tests 'binary 'raw-text-unix 'undecided - 'raw-text-unix)) - (should-not (decoder-tests 'binary 'raw-text-dos 'undecided - 'raw-text-dos)) - (should-not (decoder-tests 'binary 'raw-text-mac 'undecided - 'raw-text-mac)) - (should-not (decoder-tests 'binary 'raw-text-dos 'unix - 'raw-text-unix 'decoder-tests-lf-to-crlf)) - (should-not (decoder-tests 'binary 'raw-text-mac 'dos - 'raw-text-dos 'decoder-tests-lf-to-cr)) - (should-not (decoder-tests 'binary 'raw-text-dos 'mac - 'raw-text-mac 'decoder-tests-lf-to-lflf))) - (decoder-tests-remove-files))) - - -;;; Check the coding system `prefer-utf-8'. - -;; Read FILE. Check if the encoding was detected as DETECT. If -;; PREFER is non-nil, prefer that coding system before reading. - -(defun decoder-tests-prefer-utf-8-read (file detect prefer) - (with-temp-buffer - (with-coding-priority (if prefer (list prefer)) - (insert-file-contents file)) - (if (eq buffer-file-coding-system detect) - nil - (format "Invalid detection: %s" buffer-file-coding-system)))) - -;; Read FILE, modify it, and write it. Check if the coding system -;; used for writing was CODING. If CODING-TAG is non-nil, insert -;; coding tag with it before writing. If STR is non-nil, insert it -;; before writing. - -(defun decoder-tests-prefer-utf-8-write (file coding-tag coding - &optional str) - (with-temp-buffer - (insert-file-contents file) - (goto-char (point-min)) - (if coding-tag - (insert (format ";; -*- coding: %s; -*-\n" coding-tag)) - (insert ";;\n")) - (if str - (insert str)) - (write-file (decoder-tests-filename 'test 'test "el")) - (if (coding-system-equal buffer-file-coding-system coding) - nil - (format "Incorrect encoding: %s" last-coding-system-used)))) - -(ert-deftest ert-test-decoder-prefer-utf-8 () - (unwind-protect - (let ((ascii (decoder-tests-gen-file "ascii.el" - (decoder-tests-file-contents 'ascii) - 'unix)) - (latin (decoder-tests-gen-file "utf-8.el" - (decoder-tests-file-contents 'latin) - 'utf-8-unix))) - (should-not (decoder-tests-prefer-utf-8-read - ascii 'prefer-utf-8-unix nil)) - (should-not (decoder-tests-prefer-utf-8-read - latin 'utf-8-unix nil)) - (should-not (decoder-tests-prefer-utf-8-read - latin 'utf-8-unix 'iso-8859-1)) - (should-not (decoder-tests-prefer-utf-8-read - latin 'utf-8-unix 'sjis)) - (should-not (decoder-tests-prefer-utf-8-write - ascii nil 'prefer-utf-8-unix)) - (should-not (decoder-tests-prefer-utf-8-write - ascii 'iso-8859-1 'iso-8859-1-unix)) - (should-not (decoder-tests-prefer-utf-8-write - ascii nil 'utf-8-unix "À"))) - (decoder-tests-remove-files))) - - -;;; The following is for benchmark testing of the new optimized -;;; decoder, not for regression testing. - -(defun generate-ascii-file () - (dotimes (i 100000) - (insert-char ?a 80) - (insert "\n"))) - -(defun generate-rarely-nonascii-file () - (dotimes (i 100000) - (if (/= i 50000) - (insert-char ?a 80) - (insert ?À) - (insert-char ?a 79)) - (insert "\n"))) - -(defun generate-mostly-nonascii-file () - (dotimes (i 30000) - (insert-char ?a 80) - (insert "\n")) - (dotimes (i 20000) - (insert-char ?À 80) - (insert "\n")) - (dotimes (i 10000) - (insert-char ?あ 80) - (insert "\n"))) - - -(defvar test-file-list - '((generate-ascii-file - ("~/ascii-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" unix) - ("~/ascii-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" unix) - ("~/ascii-tag-none.unix" "" unix) - ("~/ascii-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" dos) - ("~/ascii-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" dos) - ("~/ascii-tag-none.dos" "" dos)) - (generate-rarely-nonascii-file - ("~/utf-8-r-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix) - ("~/utf-8-r-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix) - ("~/utf-8-r-tag-none.unix" "" utf-8-unix) - ("~/utf-8-r-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos) - ("~/utf-8-r-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos) - ("~/utf-8-r-tag-none.dos" "" utf-8-dos)) - (generate-mostly-nonascii-file - ("~/utf-8-m-tag-utf-8-unix.unix" ";; -*- coding: utf-8-unix; -*-" utf-8-unix) - ("~/utf-8-m-tag-utf-8.unix" ";; -*- coding: utf-8; -*-" utf-8-unix) - ("~/utf-8-m-tag-none.unix" "" utf-8-unix) - ("~/utf-8-m-tag-utf-8-dos.dos" ";; -*- coding: utf-8-dos; -*-" utf-8-dos) - ("~/utf-8-m-tag-utf-8.dos" ";; -*- coding: utf-8; -*-" utf-8-dos) - ("~/utf-8-m-tag-none.dos" "" utf-8-dos)))) - -(defun generate-benchmark-test-file () - (interactive) - (with-temp-buffer - (message "Generating data...") - (dolist (files test-file-list) - (delete-region (point-min) (point-max)) - (funcall (car files)) - (dolist (file (cdr files)) - (message "Writing %s..." (car file)) - (goto-char (point-min)) - (insert (nth 1 file) "\n") - (let ((coding-system-for-write (nth 2 file))) - (write-region (point-min) (point-max) (car file))) - (delete-region (point-min) (point)))))) - -(defun benchmark-decoder () - (let ((gc-cons-threshold 4000000)) - (insert "Without optimization:\n") - (dolist (files test-file-list) - (dolist (file (cdr files)) - (let* ((disable-ascii-optimization t) - (result (benchmark-run 10 - (with-temp-buffer (insert-file-contents (car file)))))) - (insert (format "%s: %s\n" (car file) result))))) - (insert "With optimization:\n") - (dolist (files test-file-list) - (dolist (file (cdr files)) - (let* ((disable-ascii-optimization nil) - (result (benchmark-run 10 - (with-temp-buffer (insert-file-contents (car file)))))) - (insert (format "%s: %s\n" (car file) result))))))) |