Warning comments about subtleties of fetching characters from buffers/strings.

src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR): src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments about subtle differences between FETCH_CHAR* and STRING_CHAR* macros related to unification of CJK characters. For the details, see the discussion following the message here: http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
author: Eli Zaretskii <eliz@gnu.org> 2012-04-06 16:10:30 +0300
committer: Eli Zaretskii <eliz@gnu.org> 2012-04-06 16:10:30 +0300
commit: 2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch)
tree: 208db075f932b76c4720ffd4de7d8ef732da8ae8 /src/buffer.h
parent: ea0ff31442804544d4096f1e7eaff9ecb10e479d (diff)
download: emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz
emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.bz2
emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip
1 files changed, 13 insertions, 2 deletions
diff --git a/src/buffer.h b/src/buffer.h
index 3df4a95cf93..1635a847839 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -343,7 +343,8 @@ while (0)
  - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \
  + BEG_BYTE)
 
-/* Return character at byte position POS.  */
+/* Return character at byte position POS.  See the caveat WARNING for
+   FETCH_MULTIBYTE_CHAR below.  */
 
 #define FETCH_CHAR(pos)				      	\
   (!NILP (BVAR (current_buffer, enable_multibyte_characters))	\
@@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p;
 
 /* Return character code of multi-byte form at byte position POS.  If POS
    doesn't point the head of valid multi-byte form, only the byte at
-   POS is returned.  No range checking.  */
+   POS is returned.  No range checking.
+
+   WARNING: The character returned by this macro could be "unified"
+   inside STRING_CHAR, if the original character in the buffer belongs
+   to one of the Private Use Areas (PUAs) of codepoints that Emacs
+   uses to support non-unified CJK characters.  If that happens,
+   CHAR_BYTES will return a value that is different from the length of
+   the original multibyte sequence stored in the buffer.  Therefore,
+   do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through
+   the buffer to the next character after fetching this one.  Instead,
+   use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH.  */
 
 #define FETCH_MULTIBYTE_CHAR(pos)				 	\
   (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) 	\
author	Eli Zaretskii <eliz@gnu.org>	2012-04-06 16:10:30 +0300
committer	Eli Zaretskii <eliz@gnu.org>	2012-04-06 16:10:30 +0300
commit	2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch)
tree	208db075f932b76c4720ffd4de7d8ef732da8ae8 /src/buffer.h
parent	ea0ff31442804544d4096f1e7eaff9ecb10e479d (diff)
download	emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.bz2 emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip