From 269cec13a2fc6ac18b675d0dadd07a3d4e074a72 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Fri, 25 Dec 2020 05:16:46 +0100 Subject: Remove `string-slice' -- it's not very well defined * doc/lispref/strings.texi (Creating Strings): Ditto. * lisp/emacs-lisp/subr-x.el (string-slice): Remove. --- lisp/emacs-lisp/shortdoc.el | 3 --- 1 file changed, 3 deletions(-) (limited to 'lisp/emacs-lisp/shortdoc.el') diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index 0067495fea0..618465513da 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -154,9 +154,6 @@ There can be any number of :example/:result elements." :eval (split-string "foo bar") :eval (split-string "|foo|bar|" "|") :eval (split-string "|foo|bar|" "|" t)) - (string-slice - :eval (string-slice "foo-bar" "-") - :eval (string-slice "foo-bar--zot-" "-+")) (string-lines :eval (string-lines "foo\n\nbar") :eval (string-lines "foo\n\nbar" t)) -- cgit v1.2.3 From af359de91772478587f768300ca61d64a693fedb Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Fri, 25 Dec 2020 05:58:09 +0100 Subject: Allow `string-limit' to work on encoded strings * doc/lispref/strings.texi (Creating Strings): Document it. * lisp/emacs-lisp/subr-x.el (string-limit): Allow limiting on encoded strings. --- doc/lispref/strings.texi | 9 ++++++++- lisp/emacs-lisp/shortdoc.el | 3 ++- lisp/emacs-lisp/subr-x.el | 34 +++++++++++++++++++++++++++++----- test/lisp/emacs-lisp/subr-x-tests.el | 20 ++++++++++++++++++++ 4 files changed, 59 insertions(+), 7 deletions(-) (limited to 'lisp/emacs-lisp/shortdoc.el') diff --git a/doc/lispref/strings.texi b/doc/lispref/strings.texi index 19b91471ed3..1e5f52ddfdd 100644 --- a/doc/lispref/strings.texi +++ b/doc/lispref/strings.texi @@ -394,12 +394,19 @@ there are individual words that are longer than @var{length}, these will not be shortened. @end defun -@defun string-limit string length &optional end +@defun string-limit string length &optional end coding-system If @var{string} is shorter than @var{length}, @var{string} is returned as is. Otherwise, return a substring of @var{string} consisting of the first @var{length} characters. If the optional @var{end} parameter is given, return a string of the @var{length} last characters instead. + +If @var{coding-system} is non-@code{nil}, @var{string} will be encoded +before limiting, and the result will be a unibyte string that's +shorter than @code{length}. If @var{string} contains characters that +are encoded into several bytes (for instance, when using +@code{utf-8}), the resulting unibyte string is never truncated in the +middle of a character representation. @end defun @defun string-lines string &optional omit-nulls diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index 618465513da..9d183e0d4e9 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -146,7 +146,8 @@ There can be any number of :example/:result elements." (string-limit :eval (string-limit "foobar" 3) :eval (string-limit "foobar" 3 t) - :eval (string-limit "foobar" 10)) + :eval (string-limit "foobar" 10) + :eval (string-limit "fo好" 3 nil 'utf-8)) (truncate-string-to-width :eval (truncate-string-to-width "foobar" 3) :eval (truncate-string-to-width "你好bar" 5)) diff --git a/lisp/emacs-lisp/subr-x.el b/lisp/emacs-lisp/subr-x.el index dc5840a0865..9fbb0351af4 100644 --- a/lisp/emacs-lisp/subr-x.el +++ b/lisp/emacs-lisp/subr-x.el @@ -286,7 +286,7 @@ result will have lines that are longer than LENGTH." (fill-region (point-min) (point-max))) (buffer-string))) -(defun string-limit (string length &optional end) +(defun string-limit (string length &optional end coding-system) "Return (up to) a LENGTH substring of STRING. If STRING is shorter than or equal to LENGTH, the entire string is returned unchanged. @@ -295,15 +295,39 @@ If STRING is longer than LENGTH, return a substring consisting of the first LENGTH characters of STRING. If END is non-nil, return the last LENGTH characters instead. +If CODING-SYSTEM is non-nil, STRING will be encoded before +limiting, and LENGTH is interpreted as the number of bytes to +limit the string to. The result will be a unibyte string that is +shorter than LENGTH, but will not contain \"partial\" characters, +even if CODING-SYSTEM encodes characters with several bytes per +character. + When shortening strings for display purposes, `truncate-string-to-width' is almost always a better alternative than this function." (unless (natnump length) (signal 'wrong-type-argument (list 'natnump length))) - (cond - ((<= (length string) length) string) - (end (substring string (- (length string) length))) - (t (substring string 0 length)))) + (if coding-system + (let ((result nil) + (result-length 0) + (index (if end (1- (length string)) 0))) + (while (let ((encoded (encode-coding-char + (aref string index) coding-system))) + (and (<= (+ (length encoded) result-length) length) + (progn + (push encoded result) + (cl-incf result-length (length encoded)) + (setq index (if end (1- index) + (1+ index)))) + (if end (> index -1) + (< index (length string))))) + ;; No body. + ) + (apply #'concat (if end result (nreverse result)))) + (cond + ((<= (length string) length) string) + (end (substring string (- (length string) length))) + (t (substring string 0 length))))) (defun string-lines (string &optional omit-nulls) "Split STRING into a list of lines. diff --git a/test/lisp/emacs-lisp/subr-x-tests.el b/test/lisp/emacs-lisp/subr-x-tests.el index 2ae492ecf15..b17185ab0d3 100644 --- a/test/lisp/emacs-lisp/subr-x-tests.el +++ b/test/lisp/emacs-lisp/subr-x-tests.el @@ -600,6 +600,26 @@ (should (equal (string-limit "foo" 0) "")) (should-error (string-limit "foo" -1))) +(ert-deftest subr-string-limit-coding () + (should (not (multibyte-string-p (string-limit "foó" 10 nil 'utf-8)))) + (should (equal (string-limit "foó" 10 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foó" 3 nil 'utf-8) "fo")) + (should (equal (string-limit "foó" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóa" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóá" 4 nil 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóa" 4 nil 'iso-8859-1) "fo\363a")) + (should (equal (string-limit "foóá" 4 nil 'iso-8859-1) "fo\363\341")) + (should (equal (string-limit "foóá" 4 nil 'utf-16) "\376\377\000f")) + + (should (equal (string-limit "foó" 10 t 'utf-8) "fo\303\263")) + (should (equal (string-limit "foó" 3 t 'utf-8) "o\303\263")) + (should (equal (string-limit "foó" 4 t 'utf-8) "fo\303\263")) + (should (equal (string-limit "foóa" 4 t 'utf-8) "o\303\263a")) + (should (equal (string-limit "foóá" 4 t 'utf-8) "\303\263\303\241")) + (should (equal (string-limit "foóa" 4 t 'iso-8859-1) "fo\363a")) + (should (equal (string-limit "foóá" 4 t 'iso-8859-1) "fo\363\341")) + (should (equal (string-limit "foóá" 4 t 'utf-16) "\376\377\000\341"))) + (ert-deftest subr-string-lines () (should (equal (string-lines "foo") '("foo"))) (should (equal (string-lines "foo \nbar") '("foo " "bar")))) -- cgit v1.2.3 From 0f790464d547dd57a857d88dab309b286067ac45 Mon Sep 17 00:00:00 2001 From: Lars Ingebrigtsen Date: Sun, 27 Dec 2020 09:00:23 +0100 Subject: Add new predicates for sequence lengths * doc/lispref/sequences.texi (Sequence Functions): Document them. * lisp/emacs-lisp/byte-opt.el (side-effect-free-fns): Mark them as side-effect-free. * lisp/emacs-lisp/shortdoc.el (list): Mention them. * src/fns.c (Flength): Mention them in the doc string. (length_internal): New function. (Flength_less, Flength_greater, Flength_equal): New defuns. (syms_of_fns): Sym them. --- doc/lispref/sequences.texi | 15 +++++++++ etc/NEWS | 6 ++++ lisp/emacs-lisp/byte-opt.el | 4 ++- lisp/emacs-lisp/shortdoc.el | 6 ++++ src/fns.c | 76 ++++++++++++++++++++++++++++++++++++++++++++- test/src/fns-tests.el | 30 ++++++++++++++++++ 6 files changed, 135 insertions(+), 2 deletions(-) (limited to 'lisp/emacs-lisp/shortdoc.el') diff --git a/doc/lispref/sequences.texi b/doc/lispref/sequences.texi index 952834bd4e3..57b49847e7f 100644 --- a/doc/lispref/sequences.texi +++ b/doc/lispref/sequences.texi @@ -116,6 +116,21 @@ If you need to compute the width of a string on display, you should use since @code{length} only counts the number of characters, but does not account for the display width of each character. +@defun length< sequence length +Return non-@code{nil} if @var{sequence} is shorter than @var{length}. +This may be more efficient than computing the length of @var{sequence} +if @var{sequence} is a long list. +@end defun + +@defun length> sequence length +Return non-@code{nil} if @var{sequence} is longer than @var{length}. +@end defun + +@defun length= sequence length +Return non-@code{nil} if the length of @var{sequence} is equal to +@var{length}. +@end defun + @defun elt sequence index @anchor{Definition of elt} @cindex elements of sequences diff --git a/etc/NEWS b/etc/NEWS index d24d8b1f0a2..9ae8cc91d63 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1460,6 +1460,12 @@ that makes it a valid button. ** Miscellaneous ++++ +*** New predicate functions 'length<', 'length>' and 'length='. +Using these functions may be more efficient than using 'length' (if +the length of a (long) list is being computed just to compare this +length to a number). + --- *** 'remove-hook' is now an interactive command. diff --git a/lisp/emacs-lisp/byte-opt.el b/lisp/emacs-lisp/byte-opt.el index 469bbe6c7c0..0eee6e9d015 100644 --- a/lisp/emacs-lisp/byte-opt.el +++ b/lisp/emacs-lisp/byte-opt.el @@ -1169,7 +1169,9 @@ hash-table-count int-to-string intern-soft isnan keymap-parent - lax-plist-get ldexp length line-beginning-position line-end-position + lax-plist-get ldexp + length length< length> length= + line-beginning-position line-end-position local-variable-if-set-p local-variable-p locale-info log log10 logand logb logcount logior lognot logxor lsh make-byte-code make-list make-string make-symbol marker-buffer max diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index 9d183e0d4e9..c6259f89711 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -618,6 +618,12 @@ There can be any number of :example/:result elements." "Data About Lists" (length :eval (length '(a b c))) + (length< + :eval (lenth< '(a b c) 1)) + (length> + :eval (lenth> '(a b c) 1)) + (length= + :eval (lenth> '(a b c) 3)) (safe-length :eval (safe-length '(a b c)))) diff --git a/src/fns.c b/src/fns.c index 646c3ed0834..0fded92aeb2 100644 --- a/src/fns.c +++ b/src/fns.c @@ -105,9 +105,14 @@ list_length (Lisp_Object list) DEFUN ("length", Flength, Slength, 1, 1, 0, doc: /* Return the length of vector, list or string SEQUENCE. A byte-code function object is also allowed. + If the string contains multibyte characters, this is not necessarily the number of bytes in the string; it is the number of characters. -To get the number of bytes, use `string-bytes'. */) +To get the number of bytes, use `string-bytes'. + +If the length of a list is being computed to compare to a (small) +number, the `string<', `string>' and `string=' functions may be more +efficient. */) (Lisp_Object sequence) { EMACS_INT val; @@ -145,6 +150,72 @@ least the number of distinct elements. */) return make_fixnum (len); } +static inline +EMACS_INT length_internal (Lisp_Object sequence, int len) +{ + /* If LENGTH is short (arbitrarily chosen cut-off point), use a + fast loop that doesn't care about whether SEQUENCE is + circular or not. */ + if (len < 0xffff) + while (CONSP (sequence)) + { + if (--len == 0) + return -1; + sequence = XCDR (sequence); + } + /* Signal an error on circular lists. */ + else + FOR_EACH_TAIL (sequence) + if (--len == 0) + return -1; + return len; +} + +DEFUN ("length<", Flength_less, Slength_less, 2, 2, 0, + doc: /* Return non-nil if SEQUENCE is shorter than LENGTH. +See `length' for allowed values of SEQUENCE and how elements are +counted. */) + (Lisp_Object sequence, Lisp_Object length) +{ + CHECK_FIXNUM (length); + EMACS_INT len = XFIXNUM (length); + + if (CONSP (sequence)) + return length_internal (sequence, len) == -1? Qnil: Qt; + else + return XFIXNUM (Flength (sequence)) < len? Qt: Qnil; +} + +DEFUN ("length>", Flength_greater, Slength_greater, 2, 2, 0, + doc: /* Return non-nil if SEQUENCE is longer than LENGTH. +See `length' for allowed values of SEQUENCE and how elements are +counted. */) + (Lisp_Object sequence, Lisp_Object length) +{ + CHECK_FIXNUM (length); + EMACS_INT len = XFIXNUM (length); + + if (CONSP (sequence)) + return length_internal (sequence, len + 1) == -1? Qt: Qnil; + else + return XFIXNUM (Flength (sequence)) > len? Qt: Qnil; +} + +DEFUN ("length=", Flength_equal, Slength_equal, 2, 2, 0, + doc: /* Return non-nil if SEQUENCE is equal to LENGTH. +See `length' for allowed values of SEQUENCE and how elements are +counted. */) + (Lisp_Object sequence, Lisp_Object length) +{ + CHECK_FIXNUM (length); + EMACS_INT len = XFIXNUM (length); + + if (CONSP (sequence)) + return length_internal (sequence, len + 1) == 1? Qt: Qnil; + else + return XFIXNUM (Flength (sequence)) == len? Qt: Qnil; +} + DEFUN ("proper-list-p", Fproper_list_p, Sproper_list_p, 1, 1, 0, doc: /* Return OBJECT's length if it is a proper list, nil otherwise. A proper list is neither circular nor dotted (i.e., its last cdr is nil). */ @@ -5721,6 +5792,9 @@ this variable. */); defsubr (&Srandom); defsubr (&Slength); defsubr (&Ssafe_length); + defsubr (&Slength_less); + defsubr (&Slength_greater); + defsubr (&Slength_equal); defsubr (&Sproper_list_p); defsubr (&Sstring_bytes); defsubr (&Sstring_distance); diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el index eaa569e0d95..3486c745bf3 100644 --- a/test/src/fns-tests.el +++ b/test/src/fns-tests.el @@ -999,3 +999,33 @@ (object-intervals (current-buffer))) '((0 1 (foo 1)) (1 2 (zot 3 foo 1)) (2 4 (zot 3 bar 2)) (4 5 (bar 2)) (5 6 nil))))) + +(ert-deftest length-equals-tests () + (should-not (length< (list 1 2 3) 2)) + (should-not (length< (list 1 2 3) 3)) + (should (length< (list 1 2 3) 4)) + + (should-not (length< "abc" 2)) + (should-not (length< "abc" 3)) + (should (length< "abc" 4)) + + (should (length> (list 1 2 3) 2)) + (should-not (length> (list 1 2 3) 3)) + (should-not (length> (list 1 2 3) 4)) + + (should (length> "abc" 2)) + (should-not (length> "abc" 3)) + (should-not (length> "abc" 4)) + + (should-not (length= (list 1 2 3) 2)) + (should (length= (list 1 2 3) 3)) + (should-not (length= (list 1 2 3) 4)) + + (should-not (length= "abc" 2)) + (should (length= "abc" 3)) + (should-not (length= "abc" 4)) + + (should-error + (let ((list (list 1))) + (setcdr list list) + (length< list #x1fffe)))) -- cgit v1.2.3 From df882c9701755e2ae063f05d3381de14ae09951e Mon Sep 17 00:00:00 2001 From: "Basil L. Contovounesios" Date: Sun, 27 Dec 2020 13:14:30 +0000 Subject: ; Fix recent shortdoc.el and fns.c additions * lisp/emacs-lisp/shortdoc.el (list): Fix typos. * src/fns.c (Flength_equal): Fix docstring. --- lisp/emacs-lisp/shortdoc.el | 6 +++--- src/fns.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lisp/emacs-lisp/shortdoc.el') diff --git a/lisp/emacs-lisp/shortdoc.el b/lisp/emacs-lisp/shortdoc.el index c6259f89711..7fb1a88b861 100644 --- a/lisp/emacs-lisp/shortdoc.el +++ b/lisp/emacs-lisp/shortdoc.el @@ -619,11 +619,11 @@ There can be any number of :example/:result elements." (length :eval (length '(a b c))) (length< - :eval (lenth< '(a b c) 1)) + :eval (length< '(a b c) 1)) (length> - :eval (lenth> '(a b c) 1)) + :eval (length> '(a b c) 1)) (length= - :eval (lenth> '(a b c) 3)) + :eval (length> '(a b c) 3)) (safe-length :eval (safe-length '(a b c)))) diff --git a/src/fns.c b/src/fns.c index 217e3b62cca..2de1d26dd31 100644 --- a/src/fns.c +++ b/src/fns.c @@ -202,7 +202,7 @@ counted. */) } DEFUN ("length=", Flength_equal, Slength_equal, 2, 2, 0, - doc: /* Return non-nil if SEQUENCE is equal to LENGTH. + doc: /* Return non-nil if SEQUENCE has length equal to LENGTH. See `length' for allowed values of SEQUENCE and how elements are counted. */) (Lisp_Object sequence, Lisp_Object length) -- cgit v1.2.3