diff options
Diffstat (limited to 'src/character.c')
-rw-r--r-- | src/character.c | 164 |
1 files changed, 51 insertions, 113 deletions
diff --git a/src/character.c b/src/character.c index 5d419a2e836..4902e564b1d 100644 --- a/src/character.c +++ b/src/character.c @@ -141,58 +141,6 @@ char_string (unsigned int c, unsigned char *p) } -/* Return a character whose multibyte form is at P. If LEN is not - NULL, it must be a pointer to integer. In that case, set *LEN to - the byte length of the multibyte form. If ADVANCED is not NULL, it - must be a pointer to unsigned char. In that case, set *ADVANCED to - the ending address (i.e., the starting address of the next - character) of the multibyte form. */ - -int -string_char (const unsigned char *p, const unsigned char **advanced, int *len) -{ - int c; - const unsigned char *saved_p = p; - - if (*p < 0x80 || ! (*p & 0x20) || ! (*p & 0x10)) - { - /* 1-, 2-, and 3-byte sequences can be handled by the macro. */ - c = STRING_CHAR_ADVANCE (p); - } - else if (! (*p & 0x08)) - { - /* A 4-byte sequence of this form: - 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ - c = ((((p)[0] & 0x7) << 18) - | (((p)[1] & 0x3F) << 12) - | (((p)[2] & 0x3F) << 6) - | ((p)[3] & 0x3F)); - p += 4; - } - else - { - /* A 5-byte sequence of this form: - - 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx - - Note that the top 4 `x's are always 0, so shifting p[1] can - never exceed the maximum valid character codepoint. */ - c = (/* (((p)[0] & 0x3) << 24) ... always 0, so no need to shift. */ - (((p)[1] & 0x3F) << 18) - | (((p)[2] & 0x3F) << 12) - | (((p)[3] & 0x3F) << 6) - | ((p)[4] & 0x3F)); - p += 5; - } - - if (len) - *len = p - saved_p; - if (advanced) - *advanced = p; - return c; -} - - /* Translate character C by translation table TABLE. If no translation is found in TABLE, return the untranslated character. If TABLE is a list, elements are char tables. In that case, recursively translate C by all the @@ -248,8 +196,7 @@ DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, c = XFIXNAT (ch); if (c >= 0x100) error ("Not a unibyte character: %d", c); - MAKE_CHAR_MULTIBYTE (c); - return make_fixnum (c); + return make_fixnum (make_char_multibyte (c)); } DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, @@ -340,8 +287,7 @@ c_string_width (const unsigned char *str, ptrdiff_t len, int precision, while (i_byte < len) { - int bytes; - int c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes); + int bytes, c = string_char_and_length (str + i_byte, &bytes); ptrdiff_t thiswidth = char_width (c, dp); if (0 < precision && precision - width < thiswidth) @@ -418,7 +364,7 @@ lisp_string_width (Lisp_Object string, ptrdiff_t precision, if (multibyte) { int cbytes; - c = STRING_CHAR_AND_LENGTH (str + i_byte, cbytes); + c = string_char_and_length (str + i_byte, &cbytes); bytes = cbytes; } else @@ -495,7 +441,7 @@ multibyte_chars_in_text (const unsigned char *ptr, ptrdiff_t nbytes) while (ptr < endp) { - int len = MULTIBYTE_LENGTH (ptr, endp); + int len = multibyte_length (ptr, endp, true, true); if (len == 0) emacs_abort (); @@ -517,16 +463,15 @@ parse_str_as_multibyte (const unsigned char *str, ptrdiff_t len, ptrdiff_t *nchars, ptrdiff_t *nbytes) { const unsigned char *endp = str + len; - int n; ptrdiff_t chars = 0, bytes = 0; if (len >= MAX_MULTIBYTE_LENGTH) { - const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; + const unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1); while (str < adjusted_endp) { - if (! CHAR_BYTE8_HEAD_P (*str) - && (n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0) + int n = multibyte_length (str, NULL, false, false); + if (0 < n) str += n, bytes += n; else str++, bytes += 2; @@ -535,8 +480,8 @@ parse_str_as_multibyte (const unsigned char *str, ptrdiff_t len, } while (str < endp) { - if (! CHAR_BYTE8_HEAD_P (*str) - && (n = MULTIBYTE_LENGTH (str, endp)) > 0) + int n = multibyte_length (str, endp, true, false); + if (0 < n) str += n, bytes += n; else str++, bytes += 2; @@ -563,20 +508,25 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes, unsigned char *p = str, *endp = str + nbytes; unsigned char *to; ptrdiff_t chars = 0; - int n; if (nbytes >= MAX_MULTIBYTE_LENGTH) { - unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; - while (p < adjusted_endp - && ! CHAR_BYTE8_HEAD_P (*p) - && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) - p += n, chars++; + unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1); + while (p < adjusted_endp) + { + int n = multibyte_length (p, NULL, false, false); + if (n <= 0) + break; + p += n, chars++; + } + } + while (true) + { + int n = multibyte_length (p, endp, true, false); + if (n <= 0) + break; + p += n, chars++; } - while (p < endp - && ! CHAR_BYTE8_HEAD_P (*p) - && (n = MULTIBYTE_LENGTH (p, endp)) > 0) - p += n, chars++; if (nchars) *nchars = chars; if (p == endp) @@ -590,11 +540,11 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes, if (nbytes >= MAX_MULTIBYTE_LENGTH) { - unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; + unsigned char *adjusted_endp = endp - (MAX_MULTIBYTE_LENGTH - 1); while (p < adjusted_endp) { - if (! CHAR_BYTE8_HEAD_P (*p) - && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) + int n = multibyte_length (p, NULL, false, false); + if (0 < n) { while (n--) *to++ = *p++; @@ -610,8 +560,8 @@ str_as_multibyte (unsigned char *str, ptrdiff_t len, ptrdiff_t nbytes, } while (p < endp) { - if (! CHAR_BYTE8_HEAD_P (*p) - && (n = MULTIBYTE_LENGTH (p, endp)) > 0) + int n = multibyte_length (p, endp, true, false); + if (0 < n) { while (n--) *to++ = *p++; @@ -706,7 +656,7 @@ str_as_unibyte (unsigned char *str, ptrdiff_t bytes) len = BYTES_BY_CHAR_HEAD (c); if (CHAR_BYTE8_HEAD_P (c)) { - c = STRING_CHAR_ADVANCE (p); + c = string_char_advance (&p); *to++ = CHAR_TO_BYTE8 (c); } else @@ -730,7 +680,7 @@ str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars) for (i = 0; i < chars; i++) { - int c = STRING_CHAR_ADVANCE (src); + int c = string_char_advance (&src); if (CHAR_BYTE8_P (c)) c = CHAR_TO_BYTE8 (c); @@ -823,7 +773,7 @@ string_escape_byte8 (Lisp_Object string) if (CHAR_BYTE8_HEAD_P (c)) { - c = STRING_CHAR_ADVANCE (src); + c = string_char_advance (&src); c = CHAR_TO_BYTE8 (c); dst += sprintf ((char *) dst, "\\%03o", c + 0u); } @@ -849,24 +799,22 @@ Concatenate all the argument characters and make the result a string. usage: (string &rest CHARACTERS) */) (ptrdiff_t n, Lisp_Object *args) { - ptrdiff_t i; - int c; - unsigned char *buf, *p; - Lisp_Object str; - USE_SAFE_ALLOCA; - - SAFE_NALLOCA (buf, MAX_MULTIBYTE_LENGTH, n); - p = buf; - - for (i = 0; i < n; i++) + ptrdiff_t nbytes = 0; + for (ptrdiff_t i = 0; i < n; i++) { CHECK_CHARACTER (args[i]); - c = XFIXNUM (args[i]); + nbytes += CHAR_BYTES (XFIXNUM (args[i])); + } + if (nbytes == n) + return Funibyte_string (n, args); + Lisp_Object str = make_uninit_multibyte_string (n, nbytes); + unsigned char *p = SDATA (str); + for (ptrdiff_t i = 0; i < n; i++) + { + eassume (CHARACTERP (args[i])); + int c = XFIXNUM (args[i]); p += CHAR_STRING (c, p); } - - str = make_string_from_bytes ((char *) buf, n, p - buf); - SAFE_FREE (); return str; } @@ -875,20 +823,10 @@ DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0, usage: (unibyte-string &rest BYTES) */) (ptrdiff_t n, Lisp_Object *args) { - ptrdiff_t i; - Lisp_Object str; - USE_SAFE_ALLOCA; - unsigned char *buf = SAFE_ALLOCA (n); - unsigned char *p = buf; - - for (i = 0; i < n; i++) - { - CHECK_RANGED_INTEGER (args[i], 0, 255); - *p++ = XFIXNUM (args[i]); - } - - str = make_string_from_bytes ((char *) buf, n, p - buf); - SAFE_FREE (); + Lisp_Object str = make_uninit_string (n); + unsigned char *p = SDATA (str); + for (ptrdiff_t i = 0; i < n; i++) + *p++ = check_integer_range (args[i], 0, 255); return str; } @@ -931,10 +869,10 @@ character is not ASCII nor 8-bit character, an error is signaled. */) } else { - CHECK_FIXNUM_COERCE_MARKER (position); - if (XFIXNUM (position) < BEGV || XFIXNUM (position) >= ZV) + EMACS_INT fixed_pos = fix_position (position); + if (! (BEGV <= fixed_pos && fixed_pos < ZV)) args_out_of_range_3 (position, make_fixnum (BEGV), make_fixnum (ZV)); - pos = XFIXNAT (position); + pos = fixed_pos; p = CHAR_POS_ADDR (pos); } if (NILP (BVAR (current_buffer, enable_multibyte_characters))) |