3 files changed, 34 insertions, 4 deletions
diff --git a/src/ChangeLog b/src/ChangeLog
index ea770969818..af65d38a33b 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,12 @@
+2012-04-06  Eli Zaretskii  <eliz@gnu.org>
+
+	* buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
+	* character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
+	about subtle differences between FETCH_CHAR* and STRING_CHAR*
+	macros related to unification of CJK characters.  For the details,
+	see the discussion following the message here:
+	http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
+
 2012-04-04  Chong Yidong  <cyd@gnu.org>
 
 	* keyboard.c (Vdelayed_warnings_list): Doc fix.
diff --git a/src/buffer.h b/src/buffer.h
index 3df4a95cf93..1635a847839 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -343,7 +343,8 @@ while (0)
  - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \
  + BEG_BYTE)
 
-/* Return character at byte position POS.  */
+/* Return character at byte position POS.  See the caveat WARNING for
+   FETCH_MULTIBYTE_CHAR below.  */
 
 #define FETCH_CHAR(pos)				      	\
   (!NILP (BVAR (current_buffer, enable_multibyte_characters))	\
@@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p;
 
 /* Return character code of multi-byte form at byte position POS.  If POS
    doesn't point the head of valid multi-byte form, only the byte at
-   POS is returned.  No range checking.  */
+   POS is returned.  No range checking.
+
+   WARNING: The character returned by this macro could be "unified"
+   inside STRING_CHAR, if the original character in the buffer belongs
+   to one of the Private Use Areas (PUAs) of codepoints that Emacs
+   uses to support non-unified CJK characters.  If that happens,
+   CHAR_BYTES will return a value that is different from the length of
+   the original multibyte sequence stored in the buffer.  Therefore,
+   do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through
+   the buffer to the next character after fetching this one.  Instead,
+   use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH.  */
 
 #define FETCH_MULTIBYTE_CHAR(pos)				 	\
   (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) 	\
diff --git a/src/character.h b/src/character.h
index 5ae6cb8c49c..a829def428d 100644
--- a/src/character.h
+++ b/src/character.h
@@ -292,7 +292,9 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
   } while (0)
 
 /* Return the character code of character whose multibyte form is at
-   P.  */
+   P.  Note that this macro unifies CJK characters whose codepoints
+   are in the Private Use Areas (PUAs), so it might return a different
+   codepoint from the one actually stored at P.  */
 
 #define STRING_CHAR(p)						\
   (!((p)[0] & 0x80)						\
@@ -309,7 +311,15 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 
 
 /* Like STRING_CHAR, but set ACTUAL_LEN to the length of multibyte
-   form.  */
+   form.
+
+   Note: This macro returns the actual length of the character's
+   multibyte sequence as it is stored in a buffer or string.  The
+   character it returns might have a different codepoint that has a
+   different multibyte sequence of a different legth, due to possible
+   unification of CJK characters inside string_char.  Therefore do NOT
+   assume that the length returned by this macro is identical to the
+   length of the multibyte sequence of the character it returns.  */
 
 #define STRING_CHAR_AND_LENGTH(p, actual_len)			\
   (!((p)[0] & 0x80)						\