1 files changed, 117 insertions, 84 deletions
diff --git a/src/coding.c b/src/coding.c
index ed755b1afcf..071124b4ef1 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -643,7 +643,7 @@ growable_destination (struct coding_system *coding)
 	else						\
 	  {						\
 	    src--;					\
-	    c = - string_char (src, &src, NULL);	\
+	    c = - string_char_advance (&src);		\
 	    record_conversion_result			\
 	      (coding, CODING_RESULT_INVALID_SRC);	\
 	  }						\
@@ -728,7 +728,7 @@ growable_destination (struct coding_system *coding)
 	unsigned ch = (c);		\
 	if (ch >= 0x80)			\
 	  ch = BYTE8_TO_CHAR (ch);	\
-	CHAR_STRING_ADVANCE (ch, dst);	\
+	dst += CHAR_STRING (ch, dst);	\
       }					\
     else				\
       *dst++ = (c);			\
@@ -747,11 +747,11 @@ growable_destination (struct coding_system *coding)
 	ch = (c1);			\
 	if (ch >= 0x80)			\
 	  ch = BYTE8_TO_CHAR (ch);	\
-	CHAR_STRING_ADVANCE (ch, dst);	\
+	dst += CHAR_STRING (ch, dst);	\
 	ch = (c2);			\
 	if (ch >= 0x80)			\
 	  ch = BYTE8_TO_CHAR (ch);	\
-	CHAR_STRING_ADVANCE (ch, dst);	\
+	dst += CHAR_STRING (ch, dst);	\
       }					\
     else				\
       {					\
@@ -884,18 +884,18 @@ record_conversion_result (struct coding_system *coding,
 
 
 /* Store multibyte form of the character C in P, and advance P to the
-   end of the multibyte form.  This used to be like CHAR_STRING_ADVANCE
+   end of the multibyte form.  This used to be like adding CHAR_STRING
    without ever calling MAYBE_UNIFY_CHAR, but nowadays we don't call
-   MAYBE_UNIFY_CHAR in CHAR_STRING_ADVANCE.  */
+   MAYBE_UNIFY_CHAR in CHAR_STRING.  */
 
-#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)  CHAR_STRING_ADVANCE(c, p)
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p) ((p) += CHAR_STRING (c, p))
 
 /* Return the character code of character whose multibyte form is at
    P, and advance P to the end of the multibyte form.  This used to be
-   like STRING_CHAR_ADVANCE without ever calling MAYBE_UNIFY_CHAR, but
-   nowadays STRING_CHAR_ADVANCE doesn't call MAYBE_UNIFY_CHAR.  */
+   like string_char_advance without ever calling MAYBE_UNIFY_CHAR, but
+   nowadays string_char_advance doesn't call MAYBE_UNIFY_CHAR.  */
 
-#define STRING_CHAR_ADVANCE_NO_UNIFY(p) STRING_CHAR_ADVANCE(p)
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p) string_char_advance (&(p))
 
 /* Set coding->source from coding->src_object.  */
 
@@ -5131,7 +5131,7 @@ decode_coding_ccl (struct coding_system *coding)
 	  while (i < 1024 && p < src_end)
 	    {
 	      source_byteidx[i] = p - src;
-	      source_charbuf[i++] = STRING_CHAR_ADVANCE (p);
+	      source_charbuf[i++] = string_char_advance (&p);
 	    }
 	  source_byteidx[i] = p - src;
 	}
@@ -5308,15 +5308,10 @@ encode_coding_raw_text (struct coding_system *coding)
 	      }
 	    else
 	      {
-		unsigned char str[MAX_MULTIBYTE_LENGTH], *p0 = str, *p1 = str;
-
-		CHAR_STRING_ADVANCE (c, p1);
-		do
-		  {
-		    EMIT_ONE_BYTE (*p0);
-		    p0++;
-		  }
-		while (p0 < p1);
+		unsigned char str[MAX_MULTIBYTE_LENGTH];
+		int len = CHAR_STRING (c, str);
+		for (int i = 0; i < len; i++)
+		  EMIT_ONE_BYTE (str[i]);
 	      }
 	  }
       else
@@ -5342,7 +5337,7 @@ encode_coding_raw_text (struct coding_system *coding)
 	      else if (CHAR_BYTE8_P (c))
 		*dst++ = CHAR_TO_BYTE8 (c);
 	      else
-		CHAR_STRING_ADVANCE (c, dst);
+		dst += CHAR_STRING (c, dst);
 	    }
 	}
       else
@@ -7457,7 +7452,7 @@ decode_coding (struct coding_system *coding)
 	      if (coding->src_multibyte
 		  && CHAR_BYTE8_HEAD_P (*src) && nbytes > 0)
 		{
-		  c = STRING_CHAR_ADVANCE (src);
+		  c = string_char_advance (&src);
 		  nbytes--;
 		}
 	      else
@@ -7551,10 +7546,8 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
 		  len = SCHARS (components);
 		  i = i_byte = 0;
 		  while (i < len)
-		    {
-		      FETCH_STRING_CHAR_ADVANCE (*buf, components, i, i_byte);
-		      buf++;
-		    }
+		    *buf++ = fetch_string_char_advance (components,
+							&i, &i_byte);
 		}
 	      else if (FIXNUMP (components))
 		{
@@ -7677,15 +7670,17 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
 
       if (! multibytep)
 	{
-	  int bytes;
-
 	  if (coding->encoder == encode_coding_raw_text
 	      || coding->encoder == encode_coding_ccl)
 	    c = *src++, pos++;
-	  else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
-	    c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
 	  else
-	    c = BYTE8_TO_CHAR (*src), src++, pos++;
+	    {
+	      int bytes = multibyte_length (src, src_end, true, true);
+	      if (0 < bytes)
+		c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
+	      else
+		c = BYTE8_TO_CHAR (*src), src++, pos++;
+	    }
 	}
       else
 	c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
@@ -7715,7 +7710,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
 
 	  lookup_buf[0] = c;
 	  for (i = 1; i < max_lookup && p < src_end; i++)
-	    lookup_buf[i] = STRING_CHAR_ADVANCE (p);
+	    lookup_buf[i] = string_char_advance (&p);
 	  lookup_buf_end = lookup_buf + i;
 	  trans = get_translation (trans, lookup_buf, lookup_buf_end,
 				   &from_nchars);
@@ -7734,7 +7729,7 @@ consume_chars (struct coding_system *coding, Lisp_Object translation_table,
 	  for (i = 1; i < to_nchars; i++)
 	    *buf++ = XFIXNUM (AREF (trans, i));
 	  for (i = 1; i < from_nchars; i++, pos++)
-	    src += MULTIBYTE_LENGTH_NO_CHECK (src);
+	    src += multibyte_length (src, NULL, false, true);
 	}
     }
 
@@ -9023,23 +9018,23 @@ DEFUN ("find-coding-systems-region-internal",
     }
   else
     {
-      CHECK_FIXNUM_COERCE_MARKER (start);
-      CHECK_FIXNUM_COERCE_MARKER (end);
-      if (XFIXNUM (start) < BEG || XFIXNUM (end) > Z || XFIXNUM (start) > XFIXNUM (end))
+      EMACS_INT s = fix_position (start);
+      EMACS_INT e = fix_position (end);
+      if (! (BEG <= s && s <= e && e <= Z))
 	args_out_of_range (start, end);
       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
 	return Qt;
-      start_byte = CHAR_TO_BYTE (XFIXNUM (start));
-      end_byte = CHAR_TO_BYTE (XFIXNUM (end));
-      if (XFIXNUM (end) - XFIXNUM (start) == end_byte - start_byte)
+      start_byte = CHAR_TO_BYTE (s);
+      end_byte = CHAR_TO_BYTE (e);
+      if (e - s == end_byte - start_byte)
 	return Qt;
 
-      if (XFIXNUM (start) < GPT && XFIXNUM (end) > GPT)
+      if (s < GPT && GPT < e)
 	{
-	  if ((GPT - XFIXNUM (start)) < (XFIXNUM (end) - GPT))
-	    move_gap_both (XFIXNUM (start), start_byte);
+	  if (GPT - s < e - GPT)
+	    move_gap_both (s, start_byte);
 	  else
-	    move_gap_both (XFIXNUM (end), end_byte);
+	    move_gap_both (e, end_byte);
 	}
     }
 
@@ -9075,7 +9070,7 @@ DEFUN ("find-coding-systems-region-internal",
 	p++;
       else
 	{
-	  c = STRING_CHAR_ADVANCE (p);
+	  c = string_char_advance (&p);
 	  if (!NILP (char_table_ref (work_table, c)))
 	    /* This character was already checked.  Ignore it.  */
 	    continue;
@@ -9208,7 +9203,7 @@ to the string and treated as in `substring'.  */)
 	  p = GAP_END_ADDR;
 	}
 
-      c = STRING_CHAR_ADVANCE (p);
+      c = string_char_advance (&p);
       if (! (ASCII_CHAR_P (c) && ascii_compatible)
 	  && ! char_charset (translate_char (translation_table, c),
 			     charset_list, NULL))
@@ -9277,32 +9272,35 @@ is nil.  */)
     }
   else
     {
-      CHECK_FIXNUM_COERCE_MARKER (start);
-      CHECK_FIXNUM_COERCE_MARKER (end);
-      if (XFIXNUM (start) < BEG || XFIXNUM (end) > Z || XFIXNUM (start) > XFIXNUM (end))
+      EMACS_INT s = fix_position (start);
+      EMACS_INT e = fix_position (end);
+      if (! (BEG <= s && s <= e && e <= Z))
 	args_out_of_range (start, end);
       if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
 	return Qnil;
-      start_byte = CHAR_TO_BYTE (XFIXNUM (start));
-      end_byte = CHAR_TO_BYTE (XFIXNUM (end));
-      if (XFIXNUM (end) - XFIXNUM (start) == end_byte - start_byte)
+      start_byte = CHAR_TO_BYTE (s);
+      end_byte = CHAR_TO_BYTE (e);
+      if (e - s == end_byte - start_byte)
 	return Qnil;
 
-      if (XFIXNUM (start) < GPT && XFIXNUM (end) > GPT)
+      if (s < GPT && GPT < e)
 	{
-	  if ((GPT - XFIXNUM (start)) < (XFIXNUM (end) - GPT))
-	    move_gap_both (XFIXNUM (start), start_byte);
+	  if (GPT - s < e - GPT)
+	    move_gap_both (s, start_byte);
 	  else
-	    move_gap_both (XFIXNUM (end), end_byte);
+	    move_gap_both (e, end_byte);
 	}
-      pos = XFIXNUM (start);
+      pos = s;
     }
 
   list = Qnil;
   for (tail = coding_system_list; CONSP (tail); tail = XCDR (tail))
     {
       elt = XCAR (tail);
-      attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
+      Lisp_Object spec = CODING_SYSTEM_SPEC (elt);
+      if (!VECTORP (spec))
+        xsignal1 (Qcoding_system_error, elt);
+      attrs = AREF (spec, 0);
       ASET (attrs, coding_attr_trans_tbl,
 	    get_translation_table (attrs, 1, NULL));
       list = Fcons (list2 (elt, attrs), list);
@@ -9323,7 +9321,7 @@ is nil.  */)
 	p++;
       else
 	{
-	  c = STRING_CHAR_ADVANCE (p);
+	  c = string_char_advance (&p);
 
 	  charset_map_loaded = 0;
 	  for (tail = list; CONSP (tail); tail = XCDR (tail))
@@ -9471,6 +9469,17 @@ not fully specified.)  */)
   return code_convert_region (start, end, coding_system, destination, 1, 0);
 }
 
+/* Whether STRING only contains chars in the 0..127 range.  */
+static bool
+string_ascii_p (Lisp_Object string)
+{
+  ptrdiff_t nbytes = SBYTES (string);
+  for (ptrdiff_t i = 0; i < nbytes; i++)
+    if (SREF (string, i) > 127)
+      return false;
+  return true;
+}
+
 Lisp_Object
 code_convert_string (Lisp_Object string, Lisp_Object coding_system,
 		     Lisp_Object dst_object, bool encodep, bool nocopy,
@@ -9485,7 +9494,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
       if (! norecord)
 	Vlast_coding_system_used = Qno_conversion;
       if (NILP (dst_object))
-	return (nocopy ? Fcopy_sequence (string) : string);
+	return nocopy ? string : Fcopy_sequence (string);
     }
 
   if (NILP (coding_system))
@@ -9502,7 +9511,28 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
   chars = SCHARS (string);
   bytes = SBYTES (string);
 
-  if (BUFFERP (dst_object))
+  if (EQ (dst_object, Qt))
+    {
+      /* Fast path for ASCII-only input and an ASCII-compatible coding:
+         act as identity if no EOL conversion is needed.  */
+      Lisp_Object attrs = CODING_ID_ATTRS (coding.id);
+      if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+          && (STRING_MULTIBYTE (string)
+              ? (chars == bytes) : string_ascii_p (string))
+          && (EQ (CODING_ID_EOL_TYPE (coding.id), Qunix)
+              || inhibit_eol_conversion
+              || ! memchr (SDATA (string), encodep ? '\n' : '\r', bytes)))
+        {
+          if (! norecord)
+            Vlast_coding_system_used = coding_system;
+          return (nocopy
+                  ? string
+                  : (encodep
+                     ? make_unibyte_string (SSDATA (string), bytes)
+                     : make_multibyte_string (SSDATA (string), bytes, bytes)));
+        }
+    }
+  else if (BUFFERP (dst_object))
     {
       struct buffer *buf = XBUFFER (dst_object);
       ptrdiff_t buf_pt = BUF_PT (buf);
@@ -9524,10 +9554,7 @@ code_convert_string (Lisp_Object string, Lisp_Object coding_system,
 
 
 /* Encode or decode STRING according to CODING_SYSTEM.
-   Do not set Vlast_coding_system_used.
-
-   This function is called only from macros DECODE_FILE and
-   ENCODE_FILE, thus we ignore character composition.  */
+   Do not set Vlast_coding_system_used.  */
 
 Lisp_Object
 code_convert_string_norecord (Lisp_Object string, Lisp_Object coding_system,
@@ -9696,7 +9723,7 @@ encode_string_utf_8 (Lisp_Object string, Lisp_Object buffer,
 	      || (len == 2 ? ! CHAR_BYTE8_HEAD_P (c)
 		  : (EQ (handle_over_uni, Qt)
 		     || (len == 4
-			 && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR))))
+			 && STRING_CHAR (p) <= MAX_UNICODE_CHAR))))
 	    {
 	      p += len;
 	      continue;
@@ -9978,8 +10005,7 @@ decode_string_utf_8 (Lisp_Object string, const char *str, ptrdiff_t str_len,
 		  && (len == 3
 		      || (UTF_8_EXTRA_OCTET_P (p[3])
 			  && len == 4
-			  && (string_char (p, NULL, NULL)
-			      <= MAX_UNICODE_CHAR))))))
+			  && STRING_CHAR (p) <= MAX_UNICODE_CHAR)))))
 	{
 	  p += len;
 	  continue;
@@ -10116,8 +10142,7 @@ decode_string_utf_8 (Lisp_Object string, const char *str, ptrdiff_t str_len,
 		   mlen++);
 	      if (mlen == len
 		  && (len <= 3
-		      || (len == 4
-			  && string_char (p, NULL, NULL) <= MAX_UNICODE_CHAR)
+		      || (len == 4 && STRING_CHAR (p) <= MAX_UNICODE_CHAR)
 		      || EQ (handle_over_uni, Qt)))
 		{
 		  p += len;
@@ -10297,6 +10322,16 @@ DEFUN ("internal-decode-string-utf-8", Finternal_decode_string_utf_8,
 
 #endif	/* ENABLE_UTF_8_CONVERTER_TEST */
 
+/* Encode or decode STRING using CODING_SYSTEM, with the possibility of
+   returning STRING itself if it equals the result.
+   Do not set Vlast_coding_system_used.  */
+static Lisp_Object
+convert_string_nocopy (Lisp_Object string, Lisp_Object coding_system,
+                       bool encodep)
+{
+  return code_convert_string (string, coding_system, Qt, encodep, 1, 1);
+}
+
 /* Encode or decode a file name, to or from a unibyte string suitable
    for passing to C library functions.  */
 Lisp_Object
@@ -10307,14 +10342,13 @@ decode_file_name (Lisp_Object fname)
      converts the file names either to UTF-16LE or to the system ANSI
      codepage internally, depending on the underlying OS; see w32.c.  */
   if (! NILP (Fcoding_system_p (Qutf_8)))
-    return code_convert_string_norecord (fname, Qutf_8, 0);
+    return convert_string_nocopy (fname, Qutf_8, 0);
   return fname;
 #else  /* !WINDOWSNT */
   if (! NILP (Vfile_name_coding_system))
-    return code_convert_string_norecord (fname, Vfile_name_coding_system, 0);
+    return convert_string_nocopy (fname, Vfile_name_coding_system, 0);
   else if (! NILP (Vdefault_file_name_coding_system))
-    return code_convert_string_norecord (fname,
-					 Vdefault_file_name_coding_system, 0);
+    return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 0);
   else
     return fname;
 #endif
@@ -10334,14 +10368,13 @@ encode_file_name (Lisp_Object fname)
      converts the file names either to UTF-16LE or to the system ANSI
      codepage internally, depending on the underlying OS; see w32.c.  */
   if (! NILP (Fcoding_system_p (Qutf_8)))
-    return code_convert_string_norecord (fname, Qutf_8, 1);
+    return convert_string_nocopy (fname, Qutf_8, 1);
   return fname;
 #else  /* !WINDOWSNT */
   if (! NILP (Vfile_name_coding_system))
-    return code_convert_string_norecord (fname, Vfile_name_coding_system, 1);
+    return convert_string_nocopy (fname, Vfile_name_coding_system, 1);
   else if (! NILP (Vdefault_file_name_coding_system))
-    return code_convert_string_norecord (fname,
-					 Vdefault_file_name_coding_system, 1);
+    return convert_string_nocopy (fname, Vdefault_file_name_coding_system, 1);
   else
     return fname;
 #endif
@@ -10362,7 +10395,7 @@ representation of the decoded text.
 
 This function sets `last-coding-system-used' to the precise coding system
 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.)  */)
+not fully specified.)  The function does not change the match data.  */)
   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
 {
   return code_convert_string (string, coding_system, buffer,
@@ -10382,7 +10415,7 @@ case, the return value is the length of the encoded text.
 
 This function sets `last-coding-system-used' to the precise coding system
 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.)  */)
+not fully specified.)  The function does not change the match data.  */)
   (Lisp_Object string, Lisp_Object coding_system, Lisp_Object nocopy, Lisp_Object buffer)
 {
   return code_convert_string (string, coding_system, buffer,
@@ -11061,10 +11094,8 @@ usage: (define-coding-system-internal ...)  */)
 	  else
 	    {
 	      CHECK_CONS (val);
-	      CHECK_RANGED_INTEGER (XCAR (val), 0, 255);
-	      from = XFIXNUM (XCAR (val));
-	      CHECK_RANGED_INTEGER (XCDR (val), from, 255);
-	      to = XFIXNUM (XCDR (val));
+	      from = check_integer_range (XCAR (val), 0, 255);
+	      to = check_integer_range (XCDR (val), from, 255);
 	    }
 	  for (int i = from; i <= to; i++)
 	    SSET (valids, i, 1);
@@ -11149,7 +11180,7 @@ usage: (define-coding-system-internal ...)  */)
 	  val = XCAR (tail);
 	  CHECK_CONS (val);
 	  CHECK_CHARSET_GET_ID (XCAR (val), id);
-	  CHECK_RANGED_INTEGER (XCDR (val), 0, 3);
+	  check_integer_range (XCDR (val), 0, 3);
 	  XSETCAR (val, make_fixnum (id));
 	}
 
@@ -11745,6 +11776,8 @@ syms_of_coding (void)
 
   DEFSYM (Qignored, "ignored");
 
+  DEFSYM (Qutf_8_string_p, "utf-8-string-p");
+
   defsubr (&Scoding_system_p);
   defsubr (&Sread_coding_system);
   defsubr (&Sread_non_nil_coding_system);