summaryrefslogtreecommitdiff
path: root/src/coding.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c201
1 files changed, 117 insertions, 84 deletions
diff --git a/src/coding.c b/src/coding.c
index ed755b1afcf..071124b4ef1 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -643,7 +643,7 @@ growable_destination (struct coding_system *coding)
else \
{ \
src--; \
- c = - string_char (src, &src, NULL); \
+ c = - string_char_advance (&src); \
record_conversion_result \
(coding, CODING_RESULT_INVALID_SRC); \
} \
@@ -728,7 +728,7 @@ growable_destination (struct coding_system *coding)
unsigned ch = (c); \
if (ch >= 0x80) \
ch = BYTE8_TO_CHAR (ch); \
- CHAR_STRING_ADVANCE (ch, dst); \
+ dst += CHAR_STRING (ch, dst); \
} \
else \
*dst++ = (c); \
@@ -747,11 +747,11 @@ growable_destination (struct coding_system *coding)
ch = (c1); \
if (ch >= 0x80) \
ch = BYTE8_TO_CHAR (ch); \
- CHAR_STRING_ADVANCE (ch, dst); \
+ dst += CHAR_STRING (ch, dst); \
ch = (c2); \
if (ch >= 0x80) \
ch = BYTE8_TO_CHAR (ch); \
- CHAR_STRING_ADVANCE (ch, dst); \
+ dst += CHAR_STRING (ch, dst); \
} \
else \
{ \
@@ -884,18 +884,18 @@ record_conversion_result (struct coding_system *coding,
/* Store multibyte form of the character C in P, and advance P to the
- end of the multibyte form. This used to be like CHAR_STRING_ADVANCE
+ end of the multibyte form. This used to be like adding CHAR_STRING
without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call
- MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE. */
+ MAYBE_UNIFY_CHAR in CHAR_STRING. */
-#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) CHAR_STRING_ADVANCE(c, p)
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) ((p) += CHAR_STRING (c, p))
/* Return the character code of character whose multibyte form is at
P, and advance P to the end of the multibyte form. This used to be
- like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but
- nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR. */
+ like string_char_advance without ever calling MAYBE_UNIFY_CHAR, but
+ nowadays string_char_advance doesn't call MAYBE_UNIFY_CHAR. */
-#define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p)
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p) string_char_advance (&(p))
/* Set coding->source from coding->src_object. */
@@ -5131,7 +5131,7 @@ decode_coding_ccl (struct coding_system *coding)
while (i < 1024 && p < src_end)
{
source_byteidx[i] = p - src;
- source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+ source_charbuf[i++] = string_char_advance (&p);
}
source_byteidx[i] = p - src;
}
@@ -5308,15 +5308,10 @@ encode_coding_raw_text (struct coding_system *coding)
}
else
{
- unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
-
- CHAR_STRING_ADVANCE (c, p1);
- do
- {
- EMIT_ONE_BYTE (*p0);
- p0++;
- }
- while (p0 < p1);
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
+ int len = CHAR_STRING (c, str);
+ for (int i = 0; i < len; i++)
+ EMIT_ONE_BYTE (str[i]);
}
}
else
@@ -5342,7 +5337,7 @@ encode_coding_raw_text (struct coding_system *coding)
else if (CHAR_BYTE8_P (c))
*dst++ = CHAR_TO_BYTE8 (c);
else
- CHAR_STRING_ADVANCE (c, dst);
+ dst += CHAR_STRING (c, dst);
}
}
else
@@ -7457,7 +7452,7 @@ decode_coding (struct coding_system *coding)
if (coding->src_multibyte
&& CHAR_BYTE8_HEAD_P (*src) && nbytes > 0)
{
- c = STRING_CHAR_ADVANCE (src);
+ c = string_char_advance (&src);
nbytes--;
}
else
@@ -7551,10 +7546,8 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
len = SCHARS (components);
i = i_byte = 0;
while (i < len)
- {
- FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
- buf++;
- }
+ *buf++ = fetch_string_char_advance (components,
+ &i, &i_byte);
}
else if (FIXNUMP (components))
{
@@ -7677,15 +7670,17 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
if (! multibytep)
{
- int bytes;
-
if (coding->encoder == encode_coding_raw_text
|| coding->encoder == encode_coding_ccl)
c = *src++, pos++;
- else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
- c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
else
- c = BYTE8_TO_CHAR (*src), src++, pos++;
+ {
+ int bytes = multibyte_length (src, src_end, true, true);
+ if (0 < bytes)
+ c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
+ else
+ c = BYTE8_TO_CHAR (*src), src++, pos++;
+ }
}
else
c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
@@ -7715,7 +7710,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
lookup_buf[0] = c;
for (i = 1; i < max_lookup && p < src_end; i++)
- lookup_buf[i] = STRING_CHAR_ADVANCE (p);
+ lookup_buf[i] = string_char_advance (&p);
lookup_buf_end = lookup_buf + i;
trans = get_translation (trans, lookup_buf, lookup_buf_end,
&from_nchars);
@@ -7734,7 +7729,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
for (i = 1; i < to_nchars; i++)
*buf++ = XFIXNUM (AREF (trans, i));
for (i = 1; i < from_nchars; i++, pos++)
- src += MULTIBYTE_LENGTH_NO_CHECK (src);
+ src += multibyte_length (src, NULL, false, true);
}
}
@@ -9023,23 +9018,23 @@ DEFUN ("find-coding-systems-region-internal",
}
else
{
- CHECK_FIXNUM_COERCE_MARKER (start);
- CHECK_FIXNUM_COERCE_MARKER (end);
- if (XFIXNUM (start) < BEG || XFIXNUM (end) > Z || XFIXNUM (start) > XFIXNUM (end))
+ EMACS_INT s = fix_position (start);
+ EMACS_INT e = fix_position (end);
+ if (! (BEG <= s && s <= e && e <= Z))
args_out_of_range (start, end);
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
return Qt;
- start_byte = CHAR_TO_BYTE (XFIXNUM (start));
- end_byte = CHAR_TO_BYTE (XFIXNUM (end));
- if (XFIXNUM (end) - XFIXNUM (start) == end_byte - start_byte)
+ start_byte = CHAR_TO_BYTE (s);
+ end_byte = CHAR_TO_BYTE (e);
+ if (e - s == end_byte - start_byte)
return Qt;
- if (XFIXNUM (start) < GPT && XFIXNUM (end) > GPT)
+ if (s < GPT && GPT < e)
{
- if ((GPT - XFIXNUM (start)) < (XFIXNUM (end) - GPT))
- move_gap_both (XFIXNUM (start), start_byte);
+ if (GPT - s < e - GPT)
+ move_gap_both (s, start_byte);
else
- move_gap_both (XFIXNUM (end), end_byte);
+ move_gap_both (e, end_byte);
}
}
@@ -9075,7 +9070,7 @@ DEFUN ("find-coding-systems-region-internal",
p++;
else
{
- c = STRING_CHAR_ADVANCE (p);
+ c = string_char_advance (&p);
if (!NILP (char_table_ref (work_table, c)))
/* This character was already checked. Ignore it. */
continue;
@@ -9208,7 +9203,7 @@ to the string and treated as in `substring'. */)
p = GAP_END_ADDR;
}
- c = STRING_CHAR_ADVANCE (p);
+ c = string_char_advance (&p);
if (! (ASCII_CHAR_P (c) && ascii_compatible)
&& ! char_charset (translate_char (translation_table, c),
charset_list, NULL))
@@ -9277,32 +9272,35 @@ is nil. */)
}
else
{
- CHECK_FIXNUM_COERCE_MARKER (start);
- CHECK_FIXNUM_COERCE_MARKER (end);
- if (XFIXNUM (start) < BEG || XFIXNUM (end) > Z || XFIXNUM (start) > XFIXNUM (end))
+ EMACS_INT s = fix_position (start);
+ EMACS_INT e = fix_position (end);
+ if (! (BEG <= s && s <= e && e <= Z))
args_out_of_range (start, end);
if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
return Qnil;
- start_byte = CHAR_TO_BYTE (XFIXNUM (start));
- end_byte = CHAR_TO_BYTE (XFIXNUM (end));
- if (XFIXNUM (end) - XFIXNUM (start) == end_byte - start_byte)
+ start_byte = CHAR_TO_BYTE (s);
+ end_byte = CHAR_TO_BYTE (e);
+ if (e - s == end_byte - start_byte)
return Qnil;
- if (XFIXNUM (start) < GPT && XFIXNUM (end) > GPT)
+ if (s < GPT && GPT < e)
{
- if ((GPT - XFIXNUM (start)) < (XFIXNUM (end) - GPT))
- move_gap_both (XFIXNUM (start), start_byte);
+ if (GPT - s < e - GPT)
+ move_gap_both (s, start_byte);
else
- move_gap_both (XFIXNUM (end), end_byte);
+ move_gap_both (e, end_byte);
}
- pos = XFIXNUM (start);
+ pos = s;
}
list = Qnil;
for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
{
elt = XCAR (tail);
- attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
+ Lisp_Object spec = CODING_SYSTEM_SPEC (elt);
+ if (!VECTORP (spec))
+ xsignal1 (Qcoding_system_error, elt);
+ attrs = AREF (spec, 0);
ASET (attrs, coding_attr_trans_tbl,
get_translation_table (attrs, 1, NULL));
list = Fcons (list2 (elt, attrs), list);
@@ -9323,7 +9321,7 @@ is nil. */)
p++;
else
{
- c = STRING_CHAR_ADVANCE (p);
+ c = string_char_advance (&p);
charset_map_loaded = 0;
for (tail = list; CONSP (tail); tail = XCDR (tail))
@@ -9471,6 +9469,17 @@ not fully specified.) */)
return code_convert_region (start, end, coding_system, destination, 1, 0);
}
+/* Whether STRING only contains chars in the 0..127 range. */
+static bool
+string_ascii_p (Lisp_Object string)
+{
+ ptrdiff_t nbytes = SBYTES (string);
+ for (ptrdiff_t i = 0; i < nbytes; i++)
+ if (SREF (string, i) > 127)
+ return false;
+ return true;
+}
+
Lisp_Object
code_convert_string (Lisp_Object string, Lisp_Object coding_system,
Lisp_Object dst_object, bool encodep, bool nocopy,
@@ -9485,7 +9494,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
if (! norecord)
Vlast_coding_system_used = Qno_conversion;
if (NILP (dst_object))
- return (nocopy ? Fcopy_sequence (string) : string);
+ return nocopy ? string : Fcopy_sequence (string);
}
if (NILP (coding_system))
@@ -9502,7 +9511,28 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
chars = SCHARS (string);
bytes = SBYTES (string);
- if (BUFFERP (dst_object))
+ if (EQ (dst_object, Qt))
+ {
+ /* Fast path for ASCII-only input and an ASCII-compatible coding:
+ act as identity if no EOL conversion is needed. */
+ Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
+ if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+ && (STRING_MULTIBYTE (string)
+ ? (chars == bytes) : string_ascii_p (string))
+ && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
+ || inhibit_eol_conversion
+ || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
+ {
+ if (! norecord)
+ Vlast_coding_system_used = coding_system;
+ return (nocopy
+ ? string
+ : (encodep
+ ? make_unibyte_string (SSDATA (string), bytes)
+ : make_multibyte_string (SSDATA (string), bytes, bytes)));
+ }
+ }
+ else if (BUFFERP (dst_object))
{
struct buffer *buf = XBUFFER (dst_object);
ptrdiff_t buf_pt = BUF_PT (buf);
@@ -9524,10 +9554,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
/* Encode or decode STRING according to CODING_SYSTEM.
- Do not set Vlast_coding_system_used.
-
- This function is called only from macros DECODE_FILE and
- ENCODE_FILE, thus we ignore character composition. */
+ Do not set Vlast_coding_system_used. */
Lisp_Object
code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
@@ -9696,7 +9723,7 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
|| (len == 2 ? ! CHAR_BYTE8_HEAD_P (c)
: (EQ (handle_over_uni, Qt)
|| (len == 4
- && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR))))
+ && STRING_CHAR (p) <= MAX_UNICODE_CHAR))))
{
p += len;
continue;
@@ -9978,8 +10005,7 @@ decode_string_utf_8 (Lisp_Object string, const char *str, ptrdiff_t str_len,
&& (len == 3
|| (UTF_8_EXTRA_OCTET_P (p[3])
&& len == 4
- && (string_char (p, NULL, NULL)
- <= MAX_UNICODE_CHAR))))))
+ && STRING_CHAR (p) <= MAX_UNICODE_CHAR)))))
{
p += len;
continue;
@@ -10116,8 +10142,7 @@ decode_string_utf_8 (Lisp_Object string, const char *str, ptrdiff_t str_len,
mlen++);
if (mlen == len
&& (len <= 3
- || (len == 4
- && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR)
+ || (len == 4 && STRING_CHAR (p) <= MAX_UNICODE_CHAR)
|| EQ (handle_over_uni, Qt)))
{
p += len;
@@ -10297,6 +10322,16 @@ DEFUN ("internal-decode-string-utf-8", Finternal_decode_string_utf_8,
#endif /* ENABLE_UTF_8_CONVERTER_TEST */
+/* Encode or decode STRING using CODING_SYSTEM, with the possibility of
+ returning STRING itself if it equals the result.
+ Do not set Vlast_coding_system_used. */
+static Lisp_Object
+convert_string_nocopy (Lisp_Object string, Lisp_Object coding_system,
+ bool encodep)
+{
+ return code_convert_string (string, coding_system, Qt, encodep, 1, 1);
+}
+
/* Encode or decode a file name, to or from a unibyte string suitable
for passing to C library functions. */
Lisp_Object
@@ -10307,14 +10342,13 @@ decode_file_name (Lisp_Object fname)
converts the file names either to UTF-16LE or to the system ANSI
codepage internally, depending on the underlying OS; see w32.c. */
if (! NILP (Fcoding_system_p (Qutf_8)))
- return code_convert_string_norecord (fname, Qutf_8, 0);
+ return convert_string_nocopy (fname, Qutf_8, 0);
return fname;
#else /* !WINDOWSNT */
if (! NILP (Vfile_name_coding_system))
- return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
+ return convert_string_nocopy (fname, Vfile_name_coding_system, 0);
else if (! NILP (Vdefault_file_name_coding_system))
- return code_convert_string_norecord (fname,
- Vdefault_file_name_coding_system, 0);
+ return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 0);
else
return fname;
#endif
@@ -10334,14 +10368,13 @@ encode_file_name (Lisp_Object fname)
converts the file names either to UTF-16LE or to the system ANSI
codepage internally, depending on the underlying OS; see w32.c. */
if (! NILP (Fcoding_system_p (Qutf_8)))
- return code_convert_string_norecord (fname, Qutf_8, 1);
+ return convert_string_nocopy (fname, Qutf_8, 1);
return fname;
#else /* !WINDOWSNT */
if (! NILP (Vfile_name_coding_system))
- return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
+ return convert_string_nocopy (fname, Vfile_name_coding_system, 1);
else if (! NILP (Vdefault_file_name_coding_system))
- return code_convert_string_norecord (fname,
- Vdefault_file_name_coding_system, 1);
+ return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 1);
else
return fname;
#endif
@@ -10362,7 +10395,7 @@ representation of the decoded text.
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.) */)
+not fully specified.) The function does not change the match data. */)
(Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
{
return code_convert_string (string, coding_system, buffer,
@@ -10382,7 +10415,7 @@ case, the return value is the length of the encoded text.
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.) */)
+not fully specified.) The function does not change the match data. */)
(Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
{
return code_convert_string (string, coding_system, buffer,
@@ -11061,10 +11094,8 @@ usage: (define-coding-system-internal ...) */)
else
{
CHECK_CONS (val);
- CHECK_RANGED_INTEGER (XCAR (val), 0, 255);
- from = XFIXNUM (XCAR (val));
- CHECK_RANGED_INTEGER (XCDR (val), from, 255);
- to = XFIXNUM (XCDR (val));
+ from = check_integer_range (XCAR (val), 0, 255);
+ to = check_integer_range (XCDR (val), from, 255);
}
for (int i = from; i <= to; i++)
SSET (valids, i, 1);
@@ -11149,7 +11180,7 @@ usage: (define-coding-system-internal ...) */)
val = XCAR (tail);
CHECK_CONS (val);
CHECK_CHARSET_GET_ID (XCAR (val), id);
- CHECK_RANGED_INTEGER (XCDR (val), 0, 3);
+ check_integer_range (XCDR (val), 0, 3);
XSETCAR (val, make_fixnum (id));
}
@@ -11745,6 +11776,8 @@ syms_of_coding (void)
DEFSYM (Qignored, "ignored");
+ DEFSYM (Qutf_8_string_p, "utf-8-string-p");
+
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
defsubr (&Sread_non_nil_coding_system);