summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2021-05-12 16:41:03 +0300
committerEli Zaretskii <eliz@gnu.org>2021-05-12 16:41:03 +0300
commit98e5639c3c3caf2424f35e4a9f9c53ff48f43897 (patch)
treea1ba556a1bae6974fea6453919cd09ad48cdcf48 /lisp/emacs-lisp
parentd2034296a906bf444472c02b958dba21cbaae762 (diff)
downloademacs-98e5639c3c3caf2424f35e4a9f9c53ff48f43897.tar.gz
emacs-98e5639c3c3caf2424f35e4a9f9c53ff48f43897.tar.bz2
emacs-98e5639c3c3caf2424f35e4a9f9c53ff48f43897.zip
Fix the tests for 'string-limit'
* test/lisp/emacs-lisp/subr-x-tests.el (subr-string-limit-coding): Fix the expected results of string-limit when encoding with UTF-16. Add tests for UTF-8 with BOM. (Bug#48324) * lisp/emacs-lisp/subr-x.el (string-limit): Add FIXME comment about the current implementation, which is faulty by design.
Diffstat (limited to 'lisp/emacs-lisp')
-rw-r--r--lisp/emacs-lisp/subr-x.el12
1 files changed, 12 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el
index 9c8c967ee9c..5a8885c0427 100644
--- a/lisp/emacs-lisp/subr-x.el
+++ b/lisp/emacs-lisp/subr-x.el
@@ -289,6 +289,18 @@ than this function."
(let ((result nil)
(result-length 0)
(index (if end (1- (length string)) 0)))
+ ;; FIXME: This implementation, which uses encode-coding-char
+ ;; to encode the string one character at a time, is in general
+ ;; incorrect: coding-systems that produce prefix or suffix
+ ;; bytes, such as ISO-2022-based or UTF-8/16 with BOM, will
+ ;; produce those bytes for each character, instead of just
+ ;; once for the entire string. encode-coding-char attempts to
+ ;; remove those extra bytes at least in some situations, but
+ ;; it cannot do that in all cases. And in any case, producing
+ ;; what is supposed to be a UTF-16 or ISO-2022-CN encoded
+ ;; string which lacks the BOM bytes at the beginning and the
+ ;; charset designation sequences at the head and tail of the
+ ;; result will definitely surprise the callers in some cases.
(while (let ((encoded (encode-coding-char
(aref string index) coding-system)))
(and (<= (+ (length encoded) result-length) length)