summaryrefslogtreecommitdiff
path: root/src/character.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/character.c')
-rw-r--r--src/character.c164
1 files changed, 51 insertions, 113 deletions
diff --git a/src/character.c b/src/character.c
index 5d419a2e836..4902e564b1d 100644
--- a/src/character.c
+++ b/src/character.c
@@ -141,58 +141,6 @@ char_string (unsigned int c, unsigned char *p)
}
-/* Return a character whose multibyte form is at P. If LEN is not
- NULL, it must be a pointer to integer. In that case, set *LEN to
- the byte length of the multibyte form. If ADVANCED is not NULL, it
- must be a pointer to unsigned char. In that case, set *ADVANCED to
- the ending address (i.e., the starting address of the next
- character) of the multibyte form. */
-
-int
-string_char (const unsigned char *p, const unsigned char **advanced, int *len)
-{
- int c;
- const unsigned char *saved_p = p;
-
- if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10))
- {
- /* 1-, 2-, and 3-byte sequences can be handled by the macro. */
- c = STRING_CHAR_ADVANCE (p);
- }
- else if (! (*p & 0x08))
- {
- /* A 4-byte sequence of this form:
- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
- c = ((((p)[0] & 0x7) << 18)
- | (((p)[1] & 0x3F) << 12)
- | (((p)[2] & 0x3F) << 6)
- | ((p)[3] & 0x3F));
- p += 4;
- }
- else
- {
- /* A 5-byte sequence of this form:
-
- 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
-
- Note that the top 4 `x's are always 0, so shifting p[1] can
- never exceed the maximum valid character codepoint. */
- c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */
- (((p)[1] & 0x3F) << 18)
- | (((p)[2] & 0x3F) << 12)
- | (((p)[3] & 0x3F) << 6)
- | ((p)[4] & 0x3F));
- p += 5;
- }
-
- if (len)
- *len = p - saved_p;
- if (advanced)
- *advanced = p;
- return c;
-}
-
-
/* Translate character C by translation table TABLE. If no translation is
found in TABLE, return the untranslated character. If TABLE is a list,
elements are char tables. In that case, recursively translate C by all the
@@ -248,8 +196,7 @@ DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
c = XFIXNAT (ch);
if (c >= 0x100)
error ("Not a unibyte character: %d", c);
- MAKE_CHAR_MULTIBYTE (c);
- return make_fixnum (c);
+ return make_fixnum (make_char_multibyte (c));
}
DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
@@ -340,8 +287,7 @@ c_string_width (const unsigned char *str, ptrdiff_t len, int precision,
while (i_byte < len)
{
- int bytes;
- int c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes);
+ int bytes, c = string_char_and_length (str + i_byte, &bytes);
ptrdiff_t thiswidth = char_width (c, dp);
if (0 < precision && precision - width < thiswidth)
@@ -418,7 +364,7 @@ lisp_string_width (Lisp_Object string, ptrdiff_t precision,
if (multibyte)
{
int cbytes;
- c = STRING_CHAR_AND_LENGTH (str + i_byte, cbytes);
+ c = string_char_and_length (str + i_byte, &cbytes);
bytes = cbytes;
}
else
@@ -495,7 +441,7 @@ multibyte_chars_in_text (const unsigned char *ptr, ptrdiff_t nbytes)
while (ptr < endp)
{
- int len = MULTIBYTE_LENGTH (ptr, endp);
+ int len = multibyte_length (ptr, endp, true, true);
if (len == 0)
emacs_abort ();
@@ -517,16 +463,15 @@ parse_str_as_multibyte (const unsigned char *str, ptrdiff_t len,
ptrdiff_t *nchars, ptrdiff_t *nbytes)
{
const unsigned char *endp = str + len;
- int n;
ptrdiff_t chars = 0, bytes = 0;
if (len >= MAX_MULTIBYTE_LENGTH)
{
- const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ const unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1);
while (str < adjusted_endp)
{
- if (! CHAR_BYTE8_HEAD_P (*str)
- && (n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
+ int n = multibyte_length (str, NULL, false, false);
+ if (0 < n)
str += n, bytes += n;
else
str++, bytes += 2;
@@ -535,8 +480,8 @@ parse_str_as_multibyte (const unsigned char *str, ptrdiff_t len,
}
while (str < endp)
{
- if (! CHAR_BYTE8_HEAD_P (*str)
- && (n = MULTIBYTE_LENGTH (str, endp)) > 0)
+ int n = multibyte_length (str, endp, true, false);
+ if (0 < n)
str += n, bytes += n;
else
str++, bytes += 2;
@@ -563,20 +508,25 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes,
unsigned char *p = str, *endp = str + nbytes;
unsigned char *to;
ptrdiff_t chars = 0;
- int n;
if (nbytes >= MAX_MULTIBYTE_LENGTH)
{
- unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
- while (p < adjusted_endp
- && ! CHAR_BYTE8_HEAD_P (*p)
- && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
- p += n, chars++;
+ unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1);
+ while (p < adjusted_endp)
+ {
+ int n = multibyte_length (p, NULL, false, false);
+ if (n <= 0)
+ break;
+ p += n, chars++;
+ }
+ }
+ while (true)
+ {
+ int n = multibyte_length (p, endp, true, false);
+ if (n <= 0)
+ break;
+ p += n, chars++;
}
- while (p < endp
- && ! CHAR_BYTE8_HEAD_P (*p)
- && (n = MULTIBYTE_LENGTH (p, endp)) > 0)
- p += n, chars++;
if (nchars)
*nchars = chars;
if (p == endp)
@@ -590,11 +540,11 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes,
if (nbytes >= MAX_MULTIBYTE_LENGTH)
{
- unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1);
while (p < adjusted_endp)
{
- if (! CHAR_BYTE8_HEAD_P (*p)
- && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+ int n = multibyte_length (p, NULL, false, false);
+ if (0 < n)
{
while (n--)
*to++ = *p++;
@@ -610,8 +560,8 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes,
}
while (p < endp)
{
- if (! CHAR_BYTE8_HEAD_P (*p)
- && (n = MULTIBYTE_LENGTH (p, endp)) > 0)
+ int n = multibyte_length (p, endp, true, false);
+ if (0 < n)
{
while (n--)
*to++ = *p++;
@@ -706,7 +656,7 @@ str_as_unibyte (unsigned char *str, ptrdiff_t bytes)
len = BYTES_BY_CHAR_HEAD (c);
if (CHAR_BYTE8_HEAD_P (c))
{
- c = STRING_CHAR_ADVANCE (p);
+ c = string_char_advance (&p);
*to++ = CHAR_TO_BYTE8 (c);
}
else
@@ -730,7 +680,7 @@ str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars)
for (i = 0; i < chars; i++)
{
- int c = STRING_CHAR_ADVANCE (src);
+ int c = string_char_advance (&src);
if (CHAR_BYTE8_P (c))
c = CHAR_TO_BYTE8 (c);
@@ -823,7 +773,7 @@ string_escape_byte8 (Lisp_Object string)
if (CHAR_BYTE8_HEAD_P (c))
{
- c = STRING_CHAR_ADVANCE (src);
+ c = string_char_advance (&src);
c = CHAR_TO_BYTE8 (c);
dst += sprintf ((char *) dst, "\\%03o", c + 0u);
}
@@ -849,24 +799,22 @@ Concatenate all the argument characters and make the result a string.
usage: (string &rest CHARACTERS) */)
(ptrdiff_t n, Lisp_Object *args)
{
- ptrdiff_t i;
- int c;
- unsigned char *buf, *p;
- Lisp_Object str;
- USE_SAFE_ALLOCA;
-
- SAFE_NALLOCA (buf, MAX_MULTIBYTE_LENGTH, n);
- p = buf;
-
- for (i = 0; i < n; i++)
+ ptrdiff_t nbytes = 0;
+ for (ptrdiff_t i = 0; i < n; i++)
{
CHECK_CHARACTER (args[i]);
- c = XFIXNUM (args[i]);
+ nbytes += CHAR_BYTES (XFIXNUM (args[i]));
+ }
+ if (nbytes == n)
+ return Funibyte_string (n, args);
+ Lisp_Object str = make_uninit_multibyte_string (n, nbytes);
+ unsigned char *p = SDATA (str);
+ for (ptrdiff_t i = 0; i < n; i++)
+ {
+ eassume (CHARACTERP (args[i]));
+ int c = XFIXNUM (args[i]);
p += CHAR_STRING (c, p);
}
-
- str = make_string_from_bytes ((char *) buf, n, p - buf);
- SAFE_FREE ();
return str;
}
@@ -875,20 +823,10 @@ DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0,
usage: (unibyte-string &rest BYTES) */)
(ptrdiff_t n, Lisp_Object *args)
{
- ptrdiff_t i;
- Lisp_Object str;
- USE_SAFE_ALLOCA;
- unsigned char *buf = SAFE_ALLOCA (n);
- unsigned char *p = buf;
-
- for (i = 0; i < n; i++)
- {
- CHECK_RANGED_INTEGER (args[i], 0, 255);
- *p++ = XFIXNUM (args[i]);
- }
-
- str = make_string_from_bytes ((char *) buf, n, p - buf);
- SAFE_FREE ();
+ Lisp_Object str = make_uninit_string (n);
+ unsigned char *p = SDATA (str);
+ for (ptrdiff_t i = 0; i < n; i++)
+ *p++ = check_integer_range (args[i], 0, 255);
return str;
}
@@ -931,10 +869,10 @@ character is not ASCII nor 8-bit character, an error is signaled. */)
}
else
{
- CHECK_FIXNUM_COERCE_MARKER (position);
- if (XFIXNUM (position) < BEGV || XFIXNUM (position) >= ZV)
+ EMACS_INT fixed_pos = fix_position (position);
+ if (! (BEGV <= fixed_pos && fixed_pos < ZV))
args_out_of_range_3 (position, make_fixnum (BEGV), make_fixnum (ZV));
- pos = XFIXNAT (position);
+ pos = fixed_pos;
p = CHAR_POS_ADDR (pos);
}
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))