summaryrefslogtreecommitdiff
path: root/src/coding.c
diff options
context:
space:
mode:
authorStefan Monnier <monnier@iro.umontreal.ca>2011-02-01 12:09:25 -0500
committerStefan Monnier <monnier@iro.umontreal.ca>2011-02-01 12:09:25 -0500
commit8f1d2ef658f95549eb33fe5265f8f11c5129bece (patch)
treeb7cd852a1adb423384532cfe22c31547160b22bc /src/coding.c
parent590130fb19e1f433965c421d98fedeb2d7c33310 (diff)
parent1dc4075fa8809805aed5092e93e225e889725c94 (diff)
downloademacs-8f1d2ef658f95549eb33fe5265f8f11c5129bece.tar.gz
emacs-8f1d2ef658f95549eb33fe5265f8f11c5129bece.tar.bz2
emacs-8f1d2ef658f95549eb33fe5265f8f11c5129bece.zip
Merge from trunk
Diffstat (limited to 'src/coding.c')
-rw-r--r--src/coding.c213
1 files changed, 69 insertions, 144 deletions
diff --git a/src/coding.c b/src/coding.c
index a7b7c7d6b23..3a3ba11ee9d 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,8 +1,7 @@
/* Coding system handler (conversion, detection, etc).
- Copyright (C) 2001, 2002, 2003, 2004, 2005,
- 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+ Copyright (C) 2001-2011 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
- 2005, 2006, 2007, 2008, 2009, 2010
+ 2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H14PRO021
Copyright (C) 2003
@@ -327,16 +326,6 @@ Lisp_Object Qinterrupted, Qinsufficient_memory;
symbol as a coding system. */
static Lisp_Object Qcoding_system_define_form;
-int coding_system_require_warning;
-
-Lisp_Object Vselect_safe_coding_system_function;
-
-/* Mnemonic string for each format of end-of-line. */
-Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
-/* Mnemonic string to indicate format of end-of-line is not yet
- decided. */
-Lisp_Object eol_mnemonic_undecided;
-
/* Format of end-of-line decided by system. This is Qunix on
Unix and Mac, Qdos on DOS/Windows.
This has an effect only for external encoding (i.e. for output to
@@ -345,8 +334,6 @@ static Lisp_Object system_eol_type;
#ifdef emacs
-Lisp_Object Vcoding_system_list, Vcoding_system_alist;
-
Lisp_Object Qcoding_system_p, Qcoding_system_error;
/* Coding system emacs-mule and raw-text are for converting only
@@ -356,64 +343,17 @@ Lisp_Object Qutf_8_emacs;
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
-/* Coding-system for reading files and receiving data from process. */
-Lisp_Object Vcoding_system_for_read;
-/* Coding-system for writing files and sending data to process. */
-Lisp_Object Vcoding_system_for_write;
-/* Coding-system actually used in the latest I/O. */
-Lisp_Object Vlast_coding_system_used;
-/* Set to non-nil when an error is detected while code conversion. */
-Lisp_Object Vlast_code_conversion_error;
-/* A vector of length 256 which contains information about special
- Latin codes (especially for dealing with Microsoft codes). */
-Lisp_Object Vlatin_extra_code_table;
-
-/* Flag to inhibit code conversion of end-of-line format. */
-int inhibit_eol_conversion;
-
-/* Flag to inhibit ISO2022 escape sequence detection. */
-int inhibit_iso_escape_detection;
-
-/* Flag to inhibit detection of binary files through null bytes. */
-int inhibit_null_byte_detection;
-
-/* Flag to make buffer-file-coding-system inherit from process-coding. */
-int inherit_process_coding_system;
-
/* Coding system to be used to encode text for terminal display when
terminal coding system is nil. */
struct coding_system safe_terminal_coding;
-Lisp_Object Vfile_coding_system_alist;
-Lisp_Object Vprocess_coding_system_alist;
-Lisp_Object Vnetwork_coding_system_alist;
-
-Lisp_Object Vlocale_coding_system;
-
#endif /* emacs */
-/* Flag to tell if we look up translation table on character code
- conversion. */
-Lisp_Object Venable_character_translation;
-/* Standard translation table to look up on decoding (reading). */
-Lisp_Object Vstandard_translation_table_for_decode;
-/* Standard translation table to look up on encoding (writing). */
-Lisp_Object Vstandard_translation_table_for_encode;
-
Lisp_Object Qtranslation_table;
Lisp_Object Qtranslation_table_id;
Lisp_Object Qtranslation_table_for_decode;
Lisp_Object Qtranslation_table_for_encode;
-/* Alist of charsets vs revision number. */
-static Lisp_Object Vcharset_revision_table;
-
-/* Default coding systems used for process I/O. */
-Lisp_Object Vdefault_process_coding_system;
-
-/* Char table for translating Quail and self-inserting input. */
-Lisp_Object Vtranslation_table_for_input;
-
/* Two special coding systems. */
Lisp_Object Vsjis_coding_system;
Lisp_Object Vbig5_coding_system;
@@ -681,10 +621,6 @@ enum coding_category
| CATEGORY_MASK_UTF_8_NOSIG \
| CATEGORY_MASK_UTF_8_SIG)
-/* List of symbols `coding-category-xxx' ordered by priority. This
- variable is exposed to Emacs Lisp. */
-static Lisp_Object Vcoding_category_list;
-
/* Table of coding categories (Lisp symbols). This variable is for
internal use only. */
static Lisp_Object Vcoding_category_table;
@@ -1607,10 +1543,9 @@ static int
detect_coding_utf_16 (struct coding_system *coding,
struct coding_detection_info *detect_info)
{
- const unsigned char *src = coding->source, *src_base = src;
+ const unsigned char *src = coding->source;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
- int consumed_chars = 0;
int c1, c2;
detect_info->checked |= CATEGORY_MASK_UTF_16;
@@ -2277,7 +2212,6 @@ emacs_mule_char (struct coding_system *coding, const unsigned char *src,
#define DECODE_EMACS_MULE_21_COMPOSITION() \
do { \
enum composition_method method = c - 0xF2; \
- int *charbuf_base = charbuf; \
int nbytes, nchars; \
\
ONE_MORE_BYTE (c); \
@@ -2633,11 +2567,6 @@ decode_coding_emacs_mule (struct coding_system *coding)
}
continue;
- retry:
- src = src_base;
- consumed_chars = consumed_chars_base;
- continue;
-
invalid_code:
EMACS_MULE_MAYBE_FINISH_COMPOSITION ();
src = src_base;
@@ -5450,9 +5379,9 @@ detect_coding_charset (struct coding_system *coding,
attrs = CODING_ID_ATTRS (coding->id);
valids = AREF (attrs, coding_attr_charset_valids);
name = CODING_ID_NAME (coding->id);
- if (strncmp ((char *) SDATA (SYMBOL_NAME (name)),
+ if (strncmp (SSDATA (SYMBOL_NAME (name)),
"iso-8859-", sizeof ("iso-8859-") - 1) == 0
- || strncmp ((char *) SDATA (SYMBOL_NAME (name)),
+ || strncmp (SSDATA (SYMBOL_NAME (name)),
"iso-latin-", sizeof ("iso-latin-") - 1) == 0)
check_latin_extra = 1;
@@ -6260,8 +6189,9 @@ detect_eol (const unsigned char *source, EMACS_INT src_bytes,
{
/* The found type is different from what found before.
Allow for stray ^M characters in DOS EOL files. */
- if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
- || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
+ if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
+ || (eol_seen == EOL_SEEN_CRLF
+ && this_eol == EOL_SEEN_CR))
eol_seen = EOL_SEEN_CRLF;
else
{
@@ -6276,42 +6206,40 @@ detect_eol (const unsigned char *source, EMACS_INT src_bytes,
}
}
else
- {
- while (src < src_end)
- {
- c = *src++;
- if (c == '\n' || c == '\r')
- {
- int this_eol;
+ while (src < src_end)
+ {
+ c = *src++;
+ if (c == '\n' || c == '\r')
+ {
+ int this_eol;
- if (c == '\n')
- this_eol = EOL_SEEN_LF;
- else if (src >= src_end || *src != '\n')
- this_eol = EOL_SEEN_CR;
- else
- this_eol = EOL_SEEN_CRLF, src++;
+ if (c == '\n')
+ this_eol = EOL_SEEN_LF;
+ else if (src >= src_end || *src != '\n')
+ this_eol = EOL_SEEN_CR;
+ else
+ this_eol = EOL_SEEN_CRLF, src++;
- if (eol_seen == EOL_SEEN_NONE)
- /* This is the first end-of-line. */
- eol_seen = this_eol;
- else if (eol_seen != this_eol)
- {
- /* The found type is different from what found before.
- Allow for stray ^M characters in DOS EOL files. */
- if (eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF
- || eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR)
- eol_seen = EOL_SEEN_CRLF;
- else
- {
- eol_seen = EOL_SEEN_LF;
- break;
- }
- }
- if (++total == MAX_EOL_CHECK_COUNT)
- break;
- }
- }
- }
+ if (eol_seen == EOL_SEEN_NONE)
+ /* This is the first end-of-line. */
+ eol_seen = this_eol;
+ else if (eol_seen != this_eol)
+ {
+ /* The found type is different from what found before.
+ Allow for stray ^M characters in DOS EOL files. */
+ if ((eol_seen == EOL_SEEN_CR && this_eol == EOL_SEEN_CRLF)
+ || (eol_seen == EOL_SEEN_CRLF && this_eol == EOL_SEEN_CR))
+ eol_seen = EOL_SEEN_CRLF;
+ else
+ {
+ eol_seen = EOL_SEEN_LF;
+ break;
+ }
+ }
+ if (++total == MAX_EOL_CHECK_COUNT)
+ break;
+ }
+ }
return eol_seen;
}
@@ -10533,7 +10461,7 @@ syms_of_coding (void)
defsubr (&Scoding_system_eol_type);
defsubr (&Scoding_system_priority_list);
- DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
+ DEFVAR_LISP ("coding-system-list", Vcoding_system_list,
doc: /* List of coding systems.
Do not alter the value of this variable manually. This variable should be
@@ -10541,7 +10469,7 @@ updated by the functions `define-coding-system' and
`define-coding-system-alias'. */);
Vcoding_system_list = Qnil;
- DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
+ DEFVAR_LISP ("coding-system-alist", Vcoding_system_alist,
doc: /* Alist of coding system names.
Each element is one element list of coding system name.
This variable is given to `completing-read' as COLLECTION argument.
@@ -10551,7 +10479,7 @@ updated by the functions `make-coding-system' and
`define-coding-system-alias'. */);
Vcoding_system_alist = Qnil;
- DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
+ DEFVAR_LISP ("coding-category-list", Vcoding_category_list,
doc: /* List of coding-categories (symbols) ordered by priority.
On detecting a coding system, Emacs tries code detection algorithms
@@ -10570,7 +10498,7 @@ Don't modify this variable directly, but use `set-coding-system-priority'. */);
Vcoding_category_list);
}
- DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
+ DEFVAR_LISP ("coding-system-for-read", Vcoding_system_for_read,
doc: /* Specify the coding system for read operations.
It is useful to bind this variable with `let', but do not set it globally.
If the value is a coding system, it is used for decoding on read operation.
@@ -10579,7 +10507,7 @@ There are three such tables: `file-coding-system-alist',
`process-coding-system-alist', and `network-coding-system-alist'. */);
Vcoding_system_for_read = Qnil;
- DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
+ DEFVAR_LISP ("coding-system-for-write", Vcoding_system_for_write,
doc: /* Specify the coding system for write operations.
Programs bind this variable with `let', but you should not set it globally.
If the value is a coding system, it is used for encoding of output,
@@ -10593,12 +10521,12 @@ For output to files, if the above procedure does not specify a coding system,
the value of `buffer-file-coding-system' is used. */);
Vcoding_system_for_write = Qnil;
- DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
+ DEFVAR_LISP ("last-coding-system-used", Vlast_coding_system_used,
doc: /*
Coding system used in the latest file or process I/O. */);
Vlast_coding_system_used = Qnil;
- DEFVAR_LISP ("last-code-conversion-error", &Vlast_code_conversion_error,
+ DEFVAR_LISP ("last-code-conversion-error", Vlast_code_conversion_error,
doc: /*
Error status of the last code conversion.
@@ -10615,21 +10543,21 @@ explicitly set this variable to nil before performing code
conversion. */);
Vlast_code_conversion_error = Qnil;
- DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
+ DEFVAR_BOOL ("inhibit-eol-conversion", inhibit_eol_conversion,
doc: /*
*Non-nil means always inhibit code conversion of end-of-line format.
See info node `Coding Systems' and info node `Text and Binary' concerning
such conversion. */);
inhibit_eol_conversion = 0;
- DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
+ DEFVAR_BOOL ("inherit-process-coding-system", inherit_process_coding_system,
doc: /*
Non-nil means process buffer inherits coding system of process output.
Bind it to t if the process output is to be treated as if it were a file
read from some filesystem. */);
inherit_process_coding_system = 0;
- DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
+ DEFVAR_LISP ("file-coding-system-alist", Vfile_coding_system_alist,
doc: /*
Alist to decide a coding system to use for a file I/O operation.
The format is ((PATTERN . VAL) ...),
@@ -10650,7 +10578,7 @@ See also the function `find-operation-coding-system'
and the variable `auto-coding-alist'. */);
Vfile_coding_system_alist = Qnil;
- DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
+ DEFVAR_LISP ("process-coding-system-alist", Vprocess_coding_system_alist,
doc: /*
Alist to decide a coding system to use for a process I/O operation.
The format is ((PATTERN . VAL) ...),
@@ -10666,7 +10594,7 @@ or a cons of coding systems which are used as above.
See also the function `find-operation-coding-system'. */);
Vprocess_coding_system_alist = Qnil;
- DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
+ DEFVAR_LISP ("network-coding-system-alist", Vnetwork_coding_system_alist,
doc: /*
Alist to decide a coding system to use for a network I/O operation.
The format is ((PATTERN . VAL) ...),
@@ -10683,48 +10611,48 @@ or a cons of coding systems which are used as above.
See also the function `find-operation-coding-system'. */);
Vnetwork_coding_system_alist = Qnil;
- DEFVAR_LISP ("locale-coding-system", &Vlocale_coding_system,
+ DEFVAR_LISP ("locale-coding-system", Vlocale_coding_system,
doc: /* Coding system to use with system messages.
Also used for decoding keyboard input on X Window system. */);
Vlocale_coding_system = Qnil;
/* The eol mnemonics are reset in startup.el system-dependently. */
- DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
+ DEFVAR_LISP ("eol-mnemonic-unix", eol_mnemonic_unix,
doc: /*
*String displayed in mode line for UNIX-like (LF) end-of-line format. */);
eol_mnemonic_unix = make_pure_c_string (":");
- DEFVAR_LISP ("eol-mnemonic-dos", &eol_mnemonic_dos,
+ DEFVAR_LISP ("eol-mnemonic-dos", eol_mnemonic_dos,
doc: /*
*String displayed in mode line for DOS-like (CRLF) end-of-line format. */);
eol_mnemonic_dos = make_pure_c_string ("\\");
- DEFVAR_LISP ("eol-mnemonic-mac", &eol_mnemonic_mac,
+ DEFVAR_LISP ("eol-mnemonic-mac", eol_mnemonic_mac,
doc: /*
*String displayed in mode line for MAC-like (CR) end-of-line format. */);
eol_mnemonic_mac = make_pure_c_string ("/");
- DEFVAR_LISP ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
+ DEFVAR_LISP ("eol-mnemonic-undecided", eol_mnemonic_undecided,
doc: /*
*String displayed in mode line when end-of-line format is not yet determined. */);
eol_mnemonic_undecided = make_pure_c_string (":");
- DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
+ DEFVAR_LISP ("enable-character-translation", Venable_character_translation,
doc: /*
*Non-nil enables character translation while encoding and decoding. */);
Venable_character_translation = Qt;
DEFVAR_LISP ("standard-translation-table-for-decode",
- &Vstandard_translation_table_for_decode,
+ Vstandard_translation_table_for_decode,
doc: /* Table for translating characters while decoding. */);
Vstandard_translation_table_for_decode = Qnil;
DEFVAR_LISP ("standard-translation-table-for-encode",
- &Vstandard_translation_table_for_encode,
+ Vstandard_translation_table_for_encode,
doc: /* Table for translating characters while encoding. */);
Vstandard_translation_table_for_encode = Qnil;
- DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_table,
+ DEFVAR_LISP ("charset-revision-table", Vcharset_revision_table,
doc: /* Alist of charsets vs revision numbers.
While encoding, if a charset (car part of an element) is found,
designate it with the escape sequence identifying revision (cdr part
@@ -10732,13 +10660,13 @@ of the element). */);
Vcharset_revision_table = Qnil;
DEFVAR_LISP ("default-process-coding-system",
- &Vdefault_process_coding_system,
+ Vdefault_process_coding_system,
doc: /* Cons of coding systems used for process I/O by default.
The car part is used for decoding a process output,
the cdr part is used for encoding a text to be sent to a process. */);
Vdefault_process_coding_system = Qnil;
- DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
+ DEFVAR_LISP ("latin-extra-code-table", Vlatin_extra_code_table,
doc: /*
Table of extra Latin codes in the range 128..159 (inclusive).
This is a vector of length 256.
@@ -10751,7 +10679,7 @@ Only 128th through 159th elements have a meaning. */);
Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
DEFVAR_LISP ("select-safe-coding-system-function",
- &Vselect_safe_coding_system_function,
+ Vselect_safe_coding_system_function,
doc: /*
Function to call to select safe coding system for encoding a text.
@@ -10765,7 +10693,7 @@ The default value is `select-safe-coding-system' (which see). */);
Vselect_safe_coding_system_function = Qnil;
DEFVAR_BOOL ("coding-system-require-warning",
- &coding_system_require_warning,
+ coding_system_require_warning,
doc: /* Internal use only.
If non-nil, on writing a file, `select-safe-coding-system-function' is
called even if `coding-system-for-write' is non-nil. The command
@@ -10774,7 +10702,7 @@ called even if `coding-system-for-write' is non-nil. The command
DEFVAR_BOOL ("inhibit-iso-escape-detection",
- &inhibit_iso_escape_detection,
+ inhibit_iso_escape_detection,
doc: /*
If non-nil, Emacs ignores ISO-2022 escape sequences during code detection.
@@ -10802,7 +10730,7 @@ escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argumen
inhibit_iso_escape_detection = 0;
DEFVAR_BOOL ("inhibit-null-byte-detection",
- &inhibit_null_byte_detection,
+ inhibit_null_byte_detection,
doc: /* If non-nil, Emacs ignores null bytes on code detection.
By default, Emacs treats it as binary data, and does not attempt to
decode it. The effect is as if you specified `no-conversion' for
@@ -10814,7 +10742,7 @@ from GNU Find and GNU Grep. Emacs will then ignore the null bytes and
decode text as usual. */);
inhibit_null_byte_detection = 0;
- DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
+ DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
doc: /* Char table for translating self-inserting characters.
This is applied to the result of input methods, not their input.
See also `keyboard-translate-table'.
@@ -10898,13 +10826,10 @@ emacs_strerror (int error_number)
Lisp_Object dec = code_convert_string_norecord (build_string (str),
Vlocale_coding_system,
0);
- str = (char *) SDATA (dec);
+ str = SSDATA (dec);
}
return str;
}
#endif /* emacs */
-
-/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
- (do not change this comment) */