diff options
Diffstat (limited to 'src/doprnt.c')
-rw-r--r-- | src/doprnt.c | 272 |
1 files changed, 161 insertions, 111 deletions
diff --git a/src/doprnt.c b/src/doprnt.c index 89d7e99deb4..19f58d08448 100644 --- a/src/doprnt.c +++ b/src/doprnt.c @@ -1,7 +1,7 @@ /* Output like sprintf to a buffer of specified size. -*- coding: utf-8 -*- Also takes args differently: pass one pointer to the end of the format string in addition to the format string itself. - Copyright (C) 1985, 2001-2017 Free Software Foundation, Inc. + Copyright (C) 1985, 2001-2022 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -28,6 +28,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ . For %s and %c, when field width is specified (e.g., %25s), it accounts for the display width of each character, according to char-width-table. That is, it does not assume that each character takes one column on display. + Nor does it assume that each character is a single byte. . If the size of the buffer is not enough to produce the formatted string in its entirety, it makes sure that truncation does not chop the last @@ -42,12 +43,14 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ Emacs can handle. OTOH, this function supports only a small subset of the standard C formatted - output facilities. E.g., %u and %ll are not supported, and precision is - ignored %s and %c conversions. (See below for the detailed documentation of - what is supported.) However, this is okay, as this function is supposed to - be called from `error' and similar functions, and thus does not need to - support features beyond those in `Fformat_message', which is used - by `error' on the Lisp level. */ + output facilities. E.g., %u is not supported, precision is ignored + in %s and %c conversions, and %lld does not necessarily work and + code should use something like %"pM"d with intmax_t instead. + (See below for the detailed documentation of what is supported.) + However, this is okay, as this function is supposed to be called + from 'error' and similar C functions, and thus does not need to + support all the features of 'Fformat_message', which is used by the + Lisp 'error' function. */ /* In the FORMAT argument this function supports ` and ' as directives that output left and right quotes as per ‘text-quoting style’. It @@ -61,19 +64,20 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ %e means print a `double' argument in exponential notation. %f means print a `double' argument in decimal-point notation. %g means print a `double' argument in exponential notation - or in decimal-point notation, whichever uses fewer characters. + or in decimal-point notation, depending on the value; + this is often (though not always) the shorter of the two notations. %c means print a `signed int' argument as a single character. %% means produce a literal % character. - A %-sequence may contain optional flag, width, and precision specifiers, and - a length modifier, as follows: + A %-sequence other than %% may contain optional flags, width, precision, + and length, as follows: %<flags><width><precision><length>character where flags is [+ -0], width is [0-9]+, precision is .[0-9]+, and length - is empty or l or the value of the pD or pI or pMd (sans "d") macros. - Also, %% in a format stands for a single % in the output. A % that - does not introduce a valid %-sequence causes undefined behavior. + is empty or l or the value of the pD or pI or PRIdMAX (sans "d") macros. + A % that does not introduce a valid %-sequence causes undefined behavior. + Bytes in FORMAT other than % are copied through as-is. The + flag character inserts a + before any positive number, while a space inserts a space before any positive number; these flags only affect %d, %o, @@ -88,7 +92,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ the respective argument is to be treated as `long int' or `unsigned long int'. Similarly, the value of the pD macro means to use ptrdiff_t, the value of the pI macro means to use EMACS_INT or EMACS_UINT, the - value of the pMd etc. macros means to use intmax_t or uintmax_t, + value of the PRIdMAX etc. macros means to use intmax_t or uintmax_t, and the empty length modifier means `int' or `unsigned int'. The width specifier supplies a lower limit for the length of the printed @@ -99,7 +103,9 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ For %e, %f, and %g sequences, the number after the "." in the precision specifier says how many decimal places to show; if zero, the decimal point - itself is omitted. For %s and %S, the precision specifier is ignored. */ + itself is omitted. For %d, %o, and %x sequences, the precision specifies + the minimum number of digits to appear. Precision specifiers are + not supported for other %-sequences. */ #include <config.h> #include <stdio.h> @@ -115,7 +121,50 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ another macro. */ #include "character.h" +/* Enough to handle floating point formats with large numbers. */ +enum { SIZE_BOUND_EXTRA = DBL_MAX_10_EXP + 50 }; + +/* Parse FMT as an unsigned decimal integer, putting its value into *VALUE. + Return the address of the first byte after the integer. + If FMT is not an integer, return FMT and store zero into *VALUE. */ +static char const * +parse_format_integer (char const *fmt, int *value) +{ + int n = 0; + bool overflow = false; + for (; '0' <= *fmt && *fmt <= '9'; fmt++) + { + overflow |= INT_MULTIPLY_WRAPV (n, 10, &n); + overflow |= INT_ADD_WRAPV (n, *fmt - '0', &n); + } + if (overflow || min (PTRDIFF_MAX, SIZE_MAX) - SIZE_BOUND_EXTRA < n) + error ("Format width or precision too large"); + *value = n; + return fmt; +} + +/* Like doprnt, except FORMAT_END must be non-null. + Although this function is never exercised in current Emacs, + it is retained in case some future Emacs version + contains doprnt callers that need such formats. + Having a separate function helps GCC optimize doprnt better. */ +static ptrdiff_t +doprnt_non_null_end (char *buffer, ptrdiff_t bufsize, char const *format, + char const *format_end, va_list ap) +{ + USE_SAFE_ALLOCA; + ptrdiff_t fmtlen = format_end - format; + char *fmt = SAFE_ALLOCA (fmtlen + 1); + memcpy (fmt, format, fmtlen); + fmt[fmtlen] = 0; + ptrdiff_t nbytes = doprnt (buffer, bufsize, fmt, NULL, ap); + SAFE_FREE (); + return nbytes; +} + /* Generate output from a format-spec FORMAT, + terminated at either the first NUL or (if FORMAT_END is non-null + and there are no NUL bytes between FORMAT and FORMAT_END) terminated at position FORMAT_END. (*FORMAT_END is not part of the format, but must exist and be readable.) Output goes in BUFFER, which has room for BUFSIZE chars. @@ -125,18 +174,24 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ Returns the number of bytes stored into BUFFER, excluding the terminating null byte. Output is always null-terminated. String arguments are passed as C strings. - Integers are passed as C integers. */ + Integers are passed as C integers. + + FIXME: If FORMAT_END is not at a character boundary + doprnt_non_null_end will cut the string in the middle of the + character and the returned string will have an incomplete character + sequence at the end. We may prefer to cut at a character + boundary. */ ptrdiff_t doprnt (char *buffer, ptrdiff_t bufsize, const char *format, const char *format_end, va_list ap) { + if (format_end) + return doprnt_non_null_end (buffer, bufsize, format, format_end, ap); + const char *fmt = format; /* Pointer into format string. */ char *bufptr = buffer; /* Pointer into output buffer. */ - /* Enough to handle floating point formats with large numbers. */ - enum { SIZE_BOUND_EXTRA = DBL_MAX_10_EXP + 50 }; - /* Use this for sprintf unless we need something really big. */ char tembuf[SIZE_BOUND_EXTRA + 50]; @@ -149,104 +204,92 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, /* Buffer we have got with malloc. */ char *big_buffer = NULL; - enum text_quoting_style quoting_style = text_quoting_style (); - ptrdiff_t tem = -1; - char *string; - char fixed_buffer[20]; /* Default buffer for small formatting. */ - char *fmtcpy; - int minlen; - char charbuf[MAX_MULTIBYTE_LENGTH + 1]; /* Used for %c. */ - USE_SAFE_ALLOCA; - - if (format_end == 0) - format_end = format + strlen (format); - - fmtcpy = (format_end - format < sizeof (fixed_buffer) - 1 - ? fixed_buffer - : SAFE_ALLOCA (format_end - format + 1)); + Lisp_Object quoting_style = Ftext_quoting_style (); bufsize--; /* Loop until end of format string or buffer full. */ - while (fmt < format_end && bufsize > 0) + while (*fmt && bufsize > 0) { char const *fmt0 = fmt; char fmtchar = *fmt++; if (fmtchar == '%') { - ptrdiff_t size_bound = 0; ptrdiff_t width; /* Columns occupied by STRING on display. */ enum { pDlen = sizeof pD - 1, pIlen = sizeof pI - 1, - pMlen = sizeof pMd - 2 + pMlen = sizeof PRIdMAX - 2, + maxmlen = max (max (1, pDlen), max (pIlen, pMlen)) }; enum { no_modifier, long_modifier, pD_modifier, pI_modifier, pM_modifier } length_modifier = no_modifier; static char const modifier_len[] = { 0, 1, pDlen, pIlen, pMlen }; - int maxmlen = max (max (1, pDlen), max (pIlen, pMlen)); int mlen; + char charbuf[MAX_MULTIBYTE_LENGTH + 1]; /* Used for %c. */ + + /* Width and precision specified by this %-sequence. */ + int wid = 0, prec = -1; - /* Copy this one %-spec into fmtcpy. */ - string = fmtcpy; + /* FMTSTAR will be a "%*.*X"-like version of this %-sequence. + Start by putting '%' into FMTSTAR. */ + char fmtstar[sizeof "%-+ 0*.*d" + maxmlen]; + char *string = fmtstar; *string++ = '%'; - while (fmt < format_end) + + /* Copy at most one instance of each flag into FMTSTAR. */ + bool minusflag = false, plusflag = false, zeroflag = false, + spaceflag = false; + for (;; fmt++) { - *string++ = *fmt; - if ('0' <= *fmt && *fmt <= '9') + *string = *fmt; + switch (*fmt) { - /* Get an idea of how much space we might need. - This might be a field width or a precision; e.g. - %1.1000f and %1000.1f both might need 1000+ bytes. - Parse the width or precision, checking for overflow. */ - int n = *fmt - '0'; - bool overflow = false; - while (fmt + 1 < format_end - && '0' <= fmt[1] && fmt[1] <= '9') - { - overflow |= INT_MULTIPLY_WRAPV (n, 10, &n); - overflow |= INT_ADD_WRAPV (n, fmt[1] - '0', &n); - *string++ = *++fmt; - } - - if (overflow - || min (PTRDIFF_MAX, SIZE_MAX) - SIZE_BOUND_EXTRA < n) - error ("Format width or precision too large"); - if (size_bound < n) - size_bound = n; + case '-': string += !minusflag; minusflag = true; continue; + case '+': string += !plusflag; plusflag = true; continue; + case ' ': string += !spaceflag; spaceflag = true; continue; + case '0': string += !zeroflag; zeroflag = true; continue; } - else if (! (*fmt == '-' || *fmt == ' ' || *fmt == '.' - || *fmt == '+')) - break; - fmt++; + break; } + /* Parse width and precision, putting "*.*" into FMTSTAR. */ + if ('1' <= *fmt && *fmt <= '9') + fmt = parse_format_integer (fmt, &wid); + if (*fmt == '.') + fmt = parse_format_integer (fmt + 1, &prec); + *string++ = '*'; + *string++ = '.'; + *string++ = '*'; + /* Check for the length modifiers in textual length order, so that longer modifiers override shorter ones. */ for (mlen = 1; mlen <= maxmlen; mlen++) { - if (format_end - fmt < mlen) - break; if (mlen == 1 && *fmt == 'l') length_modifier = long_modifier; - if (mlen == pDlen && memcmp (fmt, pD, pDlen) == 0) + if (mlen == pDlen && strncmp (fmt, pD, pDlen) == 0) length_modifier = pD_modifier; - if (mlen == pIlen && memcmp (fmt, pI, pIlen) == 0) + if (mlen == pIlen && strncmp (fmt, pI, pIlen) == 0) length_modifier = pI_modifier; - if (mlen == pMlen && memcmp (fmt, pMd, pMlen) == 0) + if (mlen == pMlen && strncmp (fmt, PRIdMAX, pMlen) == 0) length_modifier = pM_modifier; } + /* Copy optional length modifier and conversion specifier + character into FMTSTAR, and append a NUL. */ mlen = modifier_len[length_modifier]; - memcpy (string, fmt + 1, mlen); - string += mlen; + string = mempcpy (string, fmt, mlen + 1); fmt += mlen; *string = 0; - /* Make the size bound large enough to handle floating point formats + /* An idea of how much space we might need. + This might be a field width or a precision; e.g. + %1.1000f and %1000.1f both might need 1000+ bytes. + Make it large enough to handle floating point formats with large numbers. */ - size_bound += SIZE_BOUND_EXTRA; + ptrdiff_t size_bound = max (wid, prec) + SIZE_BOUND_EXTRA; /* Make sure we have that much. */ if (size_bound > size_allocated) @@ -257,48 +300,49 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, sprintf_buffer = big_buffer; size_allocated = size_bound; } - minlen = 0; + int minlen = 0; + ptrdiff_t tem; switch (*fmt++) { default: - error ("Invalid format operation %s", fmtcpy); + error ("Invalid format operation %s", fmt0); -/* case 'b': */ - case 'l': case 'd': switch (length_modifier) { case no_modifier: { int v = va_arg (ap, int); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case long_modifier: { long v = va_arg (ap, long); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case pD_modifier: signed_pD_modifier: { ptrdiff_t v = va_arg (ap, ptrdiff_t); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case pI_modifier: { EMACS_INT v = va_arg (ap, EMACS_INT); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case pM_modifier: { intmax_t v = va_arg (ap, intmax_t); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; + default: + eassume (false); } /* Now copy into final output, truncating as necessary. */ string = sprintf_buffer; @@ -311,13 +355,13 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, case no_modifier: { unsigned v = va_arg (ap, unsigned); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case long_modifier: { unsigned long v = va_arg (ap, unsigned long); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case pD_modifier: @@ -325,15 +369,17 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, case pI_modifier: { EMACS_UINT v = va_arg (ap, EMACS_UINT); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; case pM_modifier: { uintmax_t v = va_arg (ap, uintmax_t); - tem = sprintf (sprintf_buffer, fmtcpy, v); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, v); } break; + default: + eassume (false); } /* Now copy into final output, truncating as necessary. */ string = sprintf_buffer; @@ -344,21 +390,18 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, case 'g': { double d = va_arg (ap, double); - tem = sprintf (sprintf_buffer, fmtcpy, d); + tem = sprintf (sprintf_buffer, fmtstar, wid, prec, d); /* Now copy into final output, truncating as necessary. */ string = sprintf_buffer; goto doit; } case 'S': - string[-1] = 's'; - FALLTHROUGH; case 's': - if (fmtcpy[1] != 's') - minlen = atoi (&fmtcpy[1]); + minlen = minusflag ? -wid : wid; string = va_arg (ap, char *); - tem = strlen (string); - if (STRING_BYTES_BOUND < tem) + tem = strnlen (string, STRING_BYTES_BOUND + 1); + if (tem == STRING_BYTES_BOUND + 1) error ("String for %%s or %%S format is too long"); width = strwidth (string, tem); goto doit1; @@ -432,32 +475,41 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, string = charbuf; string[tem] = 0; width = strwidth (string, tem); - if (fmtcpy[1] != 'c') - minlen = atoi (&fmtcpy[1]); + minlen = minusflag ? -wid : wid; goto doit1; } case '%': /* Treat this '%' as normal. */ - fmt0 = fmt - 1; break; } } char const *src; ptrdiff_t srclen; - if (quoting_style == CURVE_QUOTING_STYLE && fmtchar == '`') + if (EQ (quoting_style, Qcurve) && fmtchar == '`') src = uLSQM, srclen = sizeof uLSQM - 1; - else if (quoting_style == CURVE_QUOTING_STYLE && fmtchar == '\'') + else if (EQ (quoting_style, Qcurve) && fmtchar == '\'') src = uRSQM, srclen = sizeof uRSQM - 1; - else if (quoting_style == STRAIGHT_QUOTING_STYLE && fmtchar == '`') - src = "'", srclen = 1; - else + else if (! LEADING_CODE_P (fmtchar)) { - while (fmt < format_end && !CHAR_HEAD_P (*fmt)) - fmt++; - src = fmt0, srclen = fmt - fmt0; + if (EQ (quoting_style, Qstraight) && fmtchar == '`') + fmtchar = '\''; + + *bufptr++ = fmtchar; + continue; } + else + { + int charlen = BYTES_BY_CHAR_HEAD (fmtchar); + src = fmt0; + + /* If the format string ends in the middle of a multibyte + character we don't want to skip over the NUL byte. */ + for (srclen = 1 ; *(src + srclen) != 0 && srclen < charlen ; srclen++); + + fmt = src + srclen; + } if (bufsize < srclen) { @@ -479,8 +531,6 @@ doprnt (char *buffer, ptrdiff_t bufsize, const char *format, xfree (big_buffer); *bufptr = 0; /* Make sure our string ends with a '\0' */ - - SAFE_FREE (); return bufptr - buffer; } @@ -503,7 +553,7 @@ esprintf (char *buf, char const *format, ...) return nbytes; } -#if HAVE_MODULES || (defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT) +#if defined HAVE_X_WINDOWS && defined USE_X_TOOLKIT /* Format to buffer *BUF of positive size *BUFSIZE, reallocating *BUF and updating *BUFSIZE if the buffer is too small, and otherwise @@ -513,7 +563,7 @@ esprintf (char *buf, char const *format, ...) BUFSIZE_MAX. */ ptrdiff_t exprintf (char **buf, ptrdiff_t *bufsize, - char const *nonheapbuf, ptrdiff_t bufsize_max, + char *nonheapbuf, ptrdiff_t bufsize_max, char const *format, ...) { ptrdiff_t nbytes; @@ -529,7 +579,7 @@ exprintf (char **buf, ptrdiff_t *bufsize, /* Act like exprintf, except take a va_list. */ ptrdiff_t evxprintf (char **buf, ptrdiff_t *bufsize, - char const *nonheapbuf, ptrdiff_t bufsize_max, + char *nonheapbuf, ptrdiff_t bufsize_max, char const *format, va_list ap) { for (;;) |