diff options
author | Eli Zaretskii <eliz@gnu.org> | 2012-04-06 16:10:30 +0300 |
---|---|---|
committer | Eli Zaretskii <eliz@gnu.org> | 2012-04-06 16:10:30 +0300 |
commit | 2f8e16b2a3c5782a3c8266cc76fbba80d506b93d (patch) | |
tree | 208db075f932b76c4720ffd4de7d8ef732da8ae8 /src/buffer.h | |
parent | ea0ff31442804544d4096f1e7eaff9ecb10e479d (diff) | |
download | emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.gz emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.tar.bz2 emacs-2f8e16b2a3c5782a3c8266cc76fbba80d506b93d.zip |
Warning comments about subtleties of fetching characters from buffers/strings.
src/buffer.h (FETCH_CHAR, FETCH_MULTIBYTE_CHAR):
src/character.h (STRING_CHAR, STRING_CHAR_AND_LENGTH): Add comments
about subtle differences between FETCH_CHAR* and STRING_CHAR*
macros related to unification of CJK characters. For the details,
see the discussion following the message here:
http://debbugs.gnu.org/cgi/bugreport.cgi?bug=11073#14.
Diffstat (limited to 'src/buffer.h')
-rw-r--r-- | src/buffer.h | 15 |
1 files changed, 13 insertions, 2 deletions
diff --git a/src/buffer.h b/src/buffer.h index 3df4a95cf93..1635a847839 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -343,7 +343,8 @@ while (0) - (ptr - (current_buffer)->text->beg <= GPT_BYTE - BEG_BYTE ? 0 : GAP_SIZE) \ + BEG_BYTE) -/* Return character at byte position POS. */ +/* Return character at byte position POS. See the caveat WARNING for + FETCH_MULTIBYTE_CHAR below. */ #define FETCH_CHAR(pos) \ (!NILP (BVAR (current_buffer, enable_multibyte_characters)) \ @@ -359,7 +360,17 @@ extern unsigned char *_fetch_multibyte_char_p; /* Return character code of multi-byte form at byte position POS. If POS doesn't point the head of valid multi-byte form, only the byte at - POS is returned. No range checking. */ + POS is returned. No range checking. + + WARNING: The character returned by this macro could be "unified" + inside STRING_CHAR, if the original character in the buffer belongs + to one of the Private Use Areas (PUAs) of codepoints that Emacs + uses to support non-unified CJK characters. If that happens, + CHAR_BYTES will return a value that is different from the length of + the original multibyte sequence stored in the buffer. Therefore, + do _not_ use FETCH_MULTIBYTE_CHAR if you need to advance through + the buffer to the next character after fetching this one. Instead, + use either FETCH_CHAR_ADVANCE or STRING_CHAR_AND_LENGTH. */ #define FETCH_MULTIBYTE_CHAR(pos) \ (_fetch_multibyte_char_p = (((pos) >= GPT_BYTE ? GAP_SIZE : 0) \ |