From 98e5639c3c3caf2424f35e4a9f9c53ff48f43897 Mon Sep 17 00:00:00 2001
From: Eli Zaretskii <eliz@gnu.org>
Date: Wed, 12 May 2021 16:41:03 +0300
Subject: Fix the tests for 'string-limit'

* test/lisp/emacs-lisp/subr-x-tests.el (subr-string-limit-coding):
Fix the expected results of string-limit when encoding with
UTF-16.  Add tests for UTF-8 with BOM.  (Bug#48324)

* lisp/emacs-lisp/subr-x.el (string-limit): Add FIXME comment
about the current implementation, which is faulty by design.
---
 lisp/emacs-lisp/subr-x.el | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'lisp/emacs-lisp/subr-x.el')

diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el
index 9c8c967ee9c..5a8885c0427 100644
--- a/lisp/emacs-lisp/subr-x.el
+++ b/lisp/emacs-lisp/subr-x.el
@@ -289,6 +289,18 @@ than this function."
       (let ((result nil)
             (result-length 0)
             (index (if end (1- (length string)) 0)))
+        ;; FIXME: This implementation, which uses encode-coding-char
+        ;; to encode the string one character at a time, is in general
+        ;; incorrect: coding-systems that produce prefix or suffix
+        ;; bytes, such as ISO-2022-based or UTF-8/16 with BOM, will
+        ;; produce those bytes for each character, instead of just
+        ;; once for the entire string.  encode-coding-char attempts to
+        ;; remove those extra bytes at least in some situations, but
+        ;; it cannot do that in all cases.  And in any case, producing
+        ;; what is supposed to be a UTF-16 or ISO-2022-CN encoded
+        ;; string which lacks the BOM bytes at the beginning and the
+        ;; charset designation sequences at the head and tail of the
+        ;; result will definitely surprise the callers in some cases.
         (while (let ((encoded (encode-coding-char
                                (aref string index) coding-system)))
                  (and (<= (+ (length encoded) result-length) length)
-- 
cgit v1.2.3