summaryrefslogtreecommitdiff
path: root/src/casefiddle.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/casefiddle.c')
-rw-r--r--src/casefiddle.c218
1 files changed, 127 insertions, 91 deletions
diff --git a/src/casefiddle.c b/src/casefiddle.c
index 8f564edeb95..2ea5f09b4c5 100644
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -1,7 +1,7 @@
/* -*- coding: utf-8 -*- */
/* GNU Emacs case conversion functions.
-Copyright (C) 1985, 1994, 1997-1999, 2001-2017 Free Software Foundation,
+Copyright (C) 1985, 1994, 1997-1999, 2001-2022 Free Software Foundation,
Inc.
This file is part of GNU Emacs.
@@ -54,6 +54,9 @@ struct casing_context
/* Whether the context is within a word. */
bool inword;
+
+ /* What the last operation was. */
+ bool downcase_last;
};
/* Initialize CTX structure for casing characters. */
@@ -133,9 +136,9 @@ case_character_impl (struct casing_str_buf *buf,
struct Lisp_String *str = XSTRING (prop);
if (STRING_BYTES (str) <= sizeof buf->data)
{
- buf->len_chars = str->size;
+ buf->len_chars = str->u.s.size;
buf->len_bytes = STRING_BYTES (str);
- memcpy (buf->data, str->data, buf->len_bytes);
+ memcpy (buf->data, str->u.s.data, buf->len_bytes);
return 1;
}
}
@@ -143,16 +146,20 @@ case_character_impl (struct casing_str_buf *buf,
/* Handle simple, one-to-one case. */
if (flag == CASE_DOWN)
- cased = downcase (ch);
+ {
+ cased = downcase (ch);
+ ctx->downcase_last = true;
+ }
else
{
bool cased_is_set = false;
+ ctx->downcase_last = false;
if (!NILP (ctx->titlecase_char_table))
{
prop = CHAR_TABLE_REF (ctx->titlecase_char_table, ch);
if (CHARACTERP (prop))
{
- cased = XFASTINT (prop);
+ cased = XFIXNAT (prop);
cased_is_set = true;
}
}
@@ -220,16 +227,23 @@ case_character (struct casing_str_buf *buf, struct casing_context *ctx,
return changed;
}
+/* If C is not ASCII, make it unibyte. */
+static inline int
+make_char_unibyte (int c)
+{
+ return ASCII_CHAR_P (c) ? c : CHAR_TO_BYTE8 (c);
+}
+
static Lisp_Object
do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
{
int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
| CHAR_SHIFT | CHAR_CTL | CHAR_META);
- int ch = XFASTINT (obj);
+ int ch = XFIXNAT (obj);
/* If the character has higher bits set above the flags, return it unchanged.
It is not a real character. */
- if (UNSIGNED_CMP (ch, >, flagbits))
+ if (! (0 <= ch && ch <= flagbits))
return obj;
int flags = ch & flagbits;
@@ -243,14 +257,14 @@ do_casify_natnum (struct casing_context *ctx, Lisp_Object obj)
|| !NILP (BVAR (current_buffer,
enable_multibyte_characters)));
if (! multibyte)
- MAKE_CHAR_MULTIBYTE (ch);
+ ch = make_char_multibyte (ch);
int cased = case_single_character (ctx, ch);
if (cased == ch)
return obj;
if (! multibyte)
- MAKE_CHAR_UNIBYTE (cased);
- return make_natnum (cased | flags);
+ cased = make_char_unibyte (cased);
+ return make_fixed_natnum (cased | flags);
}
static Lisp_Object
@@ -278,7 +292,7 @@ do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
{
if (dst_end - o < sizeof (struct casing_str_buf))
string_overflow ();
- int ch = STRING_CHAR_ADVANCE (src);
+ int ch = string_char_advance (&src);
case_character ((struct casing_str_buf *) o, ctx, ch,
size > 1 ? src : NULL);
n += ((struct casing_str_buf *) o)->len_chars;
@@ -290,6 +304,16 @@ do_casify_multibyte_string (struct casing_context *ctx, Lisp_Object obj)
return obj;
}
+static int
+ascii_casify_character (bool downcase, int c)
+{
+ Lisp_Object cased = CHAR_TABLE_REF (downcase?
+ uniprop_table (Qlowercase) :
+ uniprop_table (Quppercase),
+ c);
+ return FIXNATP (cased) ? XFIXNAT (cased) : c;
+}
+
static Lisp_Object
do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
{
@@ -299,16 +323,16 @@ do_casify_unibyte_string (struct casing_context *ctx, Lisp_Object obj)
obj = Fcopy_sequence (obj);
for (i = 0; i < size; i++)
{
- ch = SREF (obj, i);
- MAKE_CHAR_MULTIBYTE (ch);
+ ch = make_char_multibyte (SREF (obj, i));
cased = case_single_character (ctx, ch);
if (ch == cased)
continue;
- MAKE_CHAR_UNIBYTE (cased);
- /* If the char can't be converted to a valid byte, just don't
- change it. */
- if (cased >= 0 && cased < 256)
- SSET (obj, i, cased);
+ /* If down/upcasing changed an ASCII character into a non-ASCII
+ character (this can happen in some locales, like the Turkish
+ "I"), downcase using the ASCII char table. */
+ if (ASCII_CHAR_P (ch) && !SINGLE_BYTE_CHAR_P (cased))
+ cased = ascii_casify_character (ctx->downcase_last, ch);
+ SSET (obj, i, make_char_unibyte (cased));
}
return obj;
}
@@ -319,7 +343,7 @@ casify_object (enum case_action flag, Lisp_Object obj)
struct casing_context ctx;
prepare_casing_context (&ctx, flag, false);
- if (NATNUMP (obj))
+ if (FIXNATP (obj))
return do_casify_natnum (&ctx, obj);
else if (!STRINGP (obj))
wrong_type_argument (Qchar_or_string_p, obj);
@@ -333,10 +357,13 @@ casify_object (enum case_action flag, Lisp_Object obj)
DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
doc: /* Convert argument to upper case and return that.
-The argument may be a character or string. The result has the same type.
+The argument may be a character or string. The result has the same
+type. (See `downcase' for further details about the type.)
+
The argument object is not altered--the value is a copy. If argument
is a character, characters which map to multiple code points when
cased, e.g. fi, are returned unchanged.
+
See also `capitalize', `downcase' and `upcase-initials'. */)
(Lisp_Object obj)
{
@@ -345,7 +372,15 @@ See also `capitalize', `downcase' and `upcase-initials'. */)
DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
doc: /* Convert argument to lower case and return that.
-The argument may be a character or string. The result has the same type.
+The argument may be a character or string. The result has the same type,
+including the multibyteness of the string.
+
+This means that if this function is called with a unibyte string
+argument, and downcasing it would turn it into a multibyte string
+(according to the current locale), the downcasing is done using ASCII
+\"C\" rules instead. To accurately downcase according to the current
+locale, the string must be converted into multibyte first.
+
The argument object is not altered--the value is a copy. */)
(Lisp_Object obj)
{
@@ -356,7 +391,10 @@ DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
doc: /* Convert argument to capitalized form and return that.
This means that each word's first character is converted to either
title case or upper case, and the rest to lower case.
-The argument may be a character or string. The result has the same type.
+
+The argument may be a character or string. The result has the same
+type. (See `downcase' for further details about the type.)
+
The argument object is not altered--the value is a copy. If argument
is a character, characters which map to multiple code points when
cased, e.g. fi, are returned unchanged. */)
@@ -371,7 +409,10 @@ DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
doc: /* Convert the initial of each word in the argument to upper case.
This means that each word's first character is converted to either
title case or upper case, and the rest are left unchanged.
-The argument may be a character or string. The result has the same type.
+
+The argument may be a character or string. The result has the same
+type. (See `downcase' for further details about the type.)
+
The argument object is not altered--the value is a copy. If argument
is a character, characters which map to multiple code points when
cased, e.g. fi, are returned unchanged. */)
@@ -397,9 +438,7 @@ do_casify_unibyte_region (struct casing_context *ctx,
for (ptrdiff_t pos = *startp; pos < end; ++pos)
{
- int ch = FETCH_BYTE (pos);
- MAKE_CHAR_MULTIBYTE (ch);
-
+ int ch = make_char_multibyte (FETCH_BYTE (pos));
int cased = case_single_character (ctx, ch);
if (cased == ch)
continue;
@@ -408,8 +447,7 @@ do_casify_unibyte_region (struct casing_context *ctx,
if (first < 0)
first = pos;
- MAKE_CHAR_UNIBYTE (cased);
- FETCH_BYTE (pos) = cased;
+ FETCH_BYTE (pos) = make_char_unibyte (cased);
}
*startp = first;
@@ -433,8 +471,7 @@ do_casify_multibyte_region (struct casing_context *ctx,
for (; size; --size)
{
- int len;
- int ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (pos_byte), len);
+ int len, ch = string_char_and_length (BYTE_POS_ADDR (pos_byte), &len);
struct casing_str_buf buf;
if (!case_character (&buf, ctx, ch,
size > 1 ? BYTE_POS_ADDR (pos_byte + len) : NULL))
@@ -485,8 +522,8 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
struct casing_context ctx;
validate_region (&b, &e);
- ptrdiff_t start = XFASTINT (b);
- ptrdiff_t end = XFASTINT (e);
+ ptrdiff_t start = XFIXNAT (b);
+ ptrdiff_t end = XFIXNAT (e);
if (start == end)
/* Not modifying because nothing marked. */
return end;
@@ -516,34 +553,43 @@ casify_region (enum case_action flag, Lisp_Object b, Lisp_Object e)
return orig_end + added;
}
-DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
- "(list (region-beginning) (region-end) (region-noncontiguous-p))",
- doc: /* Convert the region to upper case. In programs, wants two arguments.
-These arguments specify the starting and ending character numbers of
-the region to operate on. When used as a command, the text between
-point and the mark is operated on.
-See also `capitalize-region'. */)
- (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
-{
- Lisp_Object bounds = Qnil;
+/* Casify a possibly noncontiguous region according to FLAG. BEG and
+ END specify the bounds, except that if REGION_NONCONTIGUOUS_P is
+ non-nil, the region's bounds are specified by (funcall
+ region-extract-function 'bounds) instead. */
+static Lisp_Object
+casify_pnc_region (enum case_action flag, Lisp_Object beg, Lisp_Object end,
+ Lisp_Object region_noncontiguous_p)
+{
if (!NILP (region_noncontiguous_p))
{
- bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
- intern ("bounds"));
-
- while (CONSP (bounds))
+ Lisp_Object bounds = call1 (Vregion_extract_function, Qbounds);
+ FOR_EACH_TAIL (bounds)
{
- casify_region (CASE_UP, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
- bounds = XCDR (bounds);
+ CHECK_CONS (XCAR (bounds));
+ casify_region (flag, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
}
+ CHECK_LIST_END (bounds, bounds);
}
else
- casify_region (CASE_UP, beg, end);
+ casify_region (flag, beg, end);
return Qnil;
}
+DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 3,
+ "(list (region-beginning) (region-end) (region-noncontiguous-p))",
+ doc: /* Convert the region to upper case. In programs, wants two arguments.
+These arguments specify the starting and ending character numbers of
+the region to operate on. When used as a command, the text between
+point and the mark is operated on.
+See also `capitalize-region'. */)
+ (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
+{
+ return casify_pnc_region (CASE_UP, beg, end, region_noncontiguous_p);
+}
+
DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 3,
"(list (region-beginning) (region-end) (region-noncontiguous-p))",
doc: /* Convert the region to lower case. In programs, wants two arguments.
@@ -552,60 +598,45 @@ the region to operate on. When used as a command, the text between
point and the mark is operated on. */)
(Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
{
- Lisp_Object bounds = Qnil;
-
- if (!NILP (region_noncontiguous_p))
- {
- bounds = call1 (Fsymbol_value (intern ("region-extract-function")),
- intern ("bounds"));
-
- while (CONSP (bounds))
- {
- casify_region (CASE_DOWN, XCAR (XCAR (bounds)), XCDR (XCAR (bounds)));
- bounds = XCDR (bounds);
- }
- }
- else
- casify_region (CASE_DOWN, beg, end);
-
- return Qnil;
+ return casify_pnc_region (CASE_DOWN, beg, end, region_noncontiguous_p);
}
-DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
+DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 3,
+ "(list (region-beginning) (region-end) (region-noncontiguous-p))",
doc: /* Convert the region to capitalized form.
This means that each word's first character is converted to either
title case or upper case, and the rest to lower case.
In programs, give two arguments, the starting and ending
character positions to operate on. */)
- (Lisp_Object beg, Lisp_Object end)
+ (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
{
- casify_region (CASE_CAPITALIZE, beg, end);
- return Qnil;
+ return casify_pnc_region (CASE_CAPITALIZE, beg, end, region_noncontiguous_p);
}
/* Like Fcapitalize_region but change only the initials. */
DEFUN ("upcase-initials-region", Fupcase_initials_region,
- Supcase_initials_region, 2, 2, "r",
+ Supcase_initials_region, 2, 3,
+ "(list (region-beginning) (region-end) (region-noncontiguous-p))",
doc: /* Upcase the initial of each word in the region.
This means that each word's first character is converted to either
title case or upper case, and the rest are left unchanged.
In programs, give two arguments, the starting and ending
character positions to operate on. */)
- (Lisp_Object beg, Lisp_Object end)
+ (Lisp_Object beg, Lisp_Object end, Lisp_Object region_noncontiguous_p)
{
- casify_region (CASE_CAPITALIZE_UP, beg, end);
- return Qnil;
+ return casify_pnc_region (CASE_CAPITALIZE_UP, beg, end,
+ region_noncontiguous_p);
}
static Lisp_Object
casify_word (enum case_action flag, Lisp_Object arg)
{
- CHECK_NUMBER (arg);
- ptrdiff_t farend = scan_words (PT, XINT (arg));
+ CHECK_FIXNUM (arg);
+ ptrdiff_t farend = scan_words (PT, XFIXNUM (arg));
if (!farend)
- farend = XINT (arg) <= 0 ? BEGV : ZV;
- SET_PT (casify_region (flag, make_number (PT), make_number (farend)));
+ farend = XFIXNUM (arg) <= 0 ? BEGV : ZV;
+ SET_PT (casify_region (flag, make_fixnum (PT), make_fixnum (farend)));
return Qnil;
}
@@ -652,12 +683,30 @@ With negative argument, capitalize previous words but do not move. */)
void
syms_of_casefiddle (void)
{
+ DEFSYM (Qbounds, "bounds");
DEFSYM (Qidentity, "identity");
DEFSYM (Qtitlecase, "titlecase");
+ DEFSYM (Qlowercase, "lowercase");
+ DEFSYM (Quppercase, "uppercase");
DEFSYM (Qspecial_uppercase, "special-uppercase");
DEFSYM (Qspecial_lowercase, "special-lowercase");
DEFSYM (Qspecial_titlecase, "special-titlecase");
+ DEFVAR_LISP ("region-extract-function", Vregion_extract_function,
+ doc: /* Function to get the region's content.
+Called with one argument METHOD which can be:
+- nil: return the content as a string (list of strings for
+ non-contiguous regions).
+- `delete-only': delete the region; the return value is undefined.
+- `bounds': return the boundaries of the region as a list of one
+ or more cons cells of the form (START . END).
+- anything else: delete the region and return its content
+ as a string (or list of strings for non-contiguous regions),
+ after filtering it with `filter-buffer-substring', which
+ is called, for each contiguous sub-region, with METHOD as its
+ 3rd argument. */);
+ Vregion_extract_function = Qnil; /* simple.el sets this. */
+
defsubr (&Supcase);
defsubr (&Sdowncase);
defsubr (&Scapitalize);
@@ -670,16 +719,3 @@ syms_of_casefiddle (void)
defsubr (&Sdowncase_word);
defsubr (&Scapitalize_word);
}
-
-void
-keys_of_casefiddle (void)
-{
- initial_define_key (control_x_map, Ctl ('U'), "upcase-region");
- Fput (intern ("upcase-region"), Qdisabled, Qt);
- initial_define_key (control_x_map, Ctl ('L'), "downcase-region");
- Fput (intern ("downcase-region"), Qdisabled, Qt);
-
- initial_define_key (meta_map, 'u', "upcase-word");
- initial_define_key (meta_map, 'l', "downcase-word");
- initial_define_key (meta_map, 'c', "capitalize-word");
-}