summaryrefslogtreecommitdiff
path: root/src/buffer.h
diff options
context:
space:
mode:
authorEli Zaretskii <eliz@gnu.org>2012-04-06 16:10:30 +0300
committerEli Zaretskii <eliz@gnu.org>2012-04-06 16:10:30 +0300
commit2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch)
tree208db075f932b76c4720ffd4de7d8ef732da8ae8 /src/buffer.h
parentea0ff31442804544d4096f1e7eaff9ecb10e479d (diff)
downloademacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz
emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.bz2
emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip
Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR): src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments about subtle differences between FETCH_CHAR* and STRING_CHAR* macros related to unification of CJK characters. For the details, see the discussion following the message here: http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
Diffstat (limited to 'src/buffer.h')
-rw-r--r--src/buffer.h15
1 files changed, 13 insertions, 2 deletions
diff --git a/src/buffer.h b/src/buffer.h
index 3df4a95cf93..1635a847839 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -343,7 +343,8 @@ while (0)
- (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \
+ BEG_BYTE)
-/* Return character at byte position POS. */
+/* Return character at byte position POS. See the caveat WARNING for
+ FETCH_MULTIBYTE_CHAR below. */
#define FETCH_CHAR(pos) \
(!NILP (BVAR (current_buffer, enable_multibyte_characters)) \
@@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p;
/* Return character code of multi-byte form at byte position POS. If POS
doesn't point the head of valid multi-byte form, only the byte at
- POS is returned. No range checking. */
+ POS is returned. No range checking.
+
+ WARNING: The character returned by this macro could be "unified"
+ inside STRING_CHAR, if the original character in the buffer belongs
+ to one of the Private Use Areas (PUAs) of codepoints that Emacs
+ uses to support non-unified CJK characters. If that happens,
+ CHAR_BYTES will return a value that is different from the length of
+ the original multibyte sequence stored in the buffer. Therefore,
+ do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through
+ the buffer to the next character after fetching this one. Instead,
+ use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */
#define FETCH_MULTIBYTE_CHAR(pos) \
(_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \