summaryrefslogtreecommitdiff
path: root/src/bidi.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/bidi.c')
-rw-r--r--src/bidi.c159
1 files changed, 122 insertions, 37 deletions
diff --git a/src/bidi.c b/src/bidi.c
index fd73b548376..c4d04136e9e 100644
--- a/src/bidi.c
+++ b/src/bidi.c
@@ -1,6 +1,9 @@
/* Low-level bidirectional buffer/string-scanning functions for GNU Emacs.
- Copyright (C) 2000-2001, 2004-2005, 2009-2017 Free Software
- Foundation, Inc.
+
+Copyright (C) 2000-2001, 2004-2005, 2009-2022 Free Software Foundation,
+Inc.
+
+Author: Eli Zaretskii <eliz@gnu.org>
This file is part of GNU Emacs.
@@ -17,9 +20,7 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
-/* Written by Eli Zaretskii <eliz@gnu.org>.
-
- A sequential implementation of the Unicode Bidirectional algorithm,
+/* A sequential implementation of the Unicode Bidirectional algorithm,
(UBA) as per UAX#9, a part of the Unicode Standard.
Unlike the Reference Implementation and most other implementations,
@@ -108,7 +109,7 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
-------------------
In a nutshell, fetching the next character boils down to calling
- STRING_CHAR_AND_LENGTH, passing it the address of a buffer or
+ string_char_and_length, passing it the address of a buffer or
string position. See bidi_fetch_char. However, if the next
character is "covered" by a display property of some kind,
bidi_fetch_char returns the u+FFFC "object replacement character"
@@ -238,13 +239,13 @@ along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
necessary. */
#include <config.h>
-#include <stdio.h>
#include "lisp.h"
#include "character.h"
#include "buffer.h"
#include "dispextern.h"
#include "region-cache.h"
+#include "sysstdio.h"
static bool bidi_initialized = 0;
@@ -280,7 +281,7 @@ bidi_get_type (int ch, bidi_dir_t override)
if (ch < 0 || ch > MAX_CHAR)
emacs_abort ();
- default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+ default_type = (bidi_type_t) XFIXNUM (CHAR_TABLE_REF (bidi_type_table, ch));
/* Every valid character code, even those that are unassigned by the
UCD, have some bidi-class property, according to
DerivedBidiClass.txt file. Therefore, if we ever get UNKNOWN_BT
@@ -379,15 +380,15 @@ bidi_mirror_char (int c)
emacs_abort ();
val = CHAR_TABLE_REF (bidi_mirror_table, c);
- if (INTEGERP (val))
+ if (FIXNUMP (val))
{
int v;
/* When debugging, check before assigning to V, so that the check
isn't broken by undefined behavior due to int overflow. */
- eassert (CHAR_VALID_P (XINT (val)));
+ eassert (CHAR_VALID_P (XFIXNUM (val)));
- v = XINT (val);
+ v = XFIXNUM (val);
/* Minimal test we must do in optimized builds, to prevent weird
crashes further down the road. */
@@ -404,12 +405,12 @@ bidi_mirror_char (int c)
static bidi_bracket_type_t
bidi_paired_bracket_type (int c)
{
- if (c == BIDI_EOB)
+ if (c == BIDI_EOB || bidi_inhibit_bpa)
return BIDI_BRACKET_NONE;
if (c < 0 || c > MAX_CHAR)
emacs_abort ();
- return (bidi_bracket_type_t) XINT (CHAR_TABLE_REF (bidi_brackets_table, c));
+ return (bidi_bracket_type_t) XFIXNUM (CHAR_TABLE_REF (bidi_brackets_table, c));
}
/* Determine the start-of-sequence (sos) directional type given the two
@@ -1268,7 +1269,6 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
ptrdiff_t endpos
= (string->s || STRINGP (string->lstring)) ? string->schars : ZV;
struct text_pos pos;
- int len;
/* If we got past the last known position of display string, compute
the position of the next one. That position could be at CHARPOS. */
@@ -1277,6 +1277,12 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
SET_TEXT_POS (pos, charpos, bytepos);
*disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
+ /* The factor of 100 below is a heuristic that needs to be
+ tuned. It means we consider 100 buffer positions examined by
+ the above call roughly equivalent to the display engine
+ iterating over a single buffer position. */
+ if (max_redisplay_ticks > 0 && *disp_pos > charpos)
+ update_redisplay_ticks ((*disp_pos - charpos) / 100 + 1, w);
}
/* Fetch the character at BYTEPOS. */
@@ -1340,10 +1346,10 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
normal_char:
if (string->s)
{
-
if (!string->unibyte)
{
- ch = STRING_CHAR_AND_LENGTH (string->s + bytepos, len);
+ int len;
+ ch = string_char_and_length (string->s + bytepos, &len);
*ch_len = len;
}
else
@@ -1356,8 +1362,9 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
{
if (!string->unibyte)
{
- ch = STRING_CHAR_AND_LENGTH (SDATA (string->lstring) + bytepos,
- len);
+ int len;
+ ch = string_char_and_length (SDATA (string->lstring) + bytepos,
+ &len);
*ch_len = len;
}
else
@@ -1368,9 +1375,11 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
}
else
{
- ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (bytepos), len);
+ int len;
+ ch = string_char_and_length (BYTE_POS_ADDR (bytepos), &len);
*ch_len = len;
}
+
*nchars = 1;
}
@@ -1382,6 +1391,8 @@ bidi_fetch_char (ptrdiff_t charpos, ptrdiff_t bytepos, ptrdiff_t *disp_pos,
SET_TEXT_POS (pos, charpos + *nchars, bytepos + *ch_len);
*disp_pos = compute_display_string_pos (&pos, string, w, frame_window_p,
disp_prop);
+ if (max_redisplay_ticks > 0 && *disp_pos > charpos + *nchars)
+ update_redisplay_ticks ((*disp_pos - charpos - *nchars) / 100 + 1, w);
}
return ch;
@@ -1457,6 +1468,11 @@ bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
else
start_re = paragraph_start_re;
+ /* Prevent quitting inside re_match_2, as redisplay_window could
+ have temporarily moved point. */
+ specpdl_ref count = SPECPDL_INDEX ();
+ specbind (Qinhibit_quit, Qt);
+
val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
if (val < 0)
{
@@ -1466,6 +1482,7 @@ bidi_at_paragraph_end (ptrdiff_t charpos, ptrdiff_t bytepos)
val = -2;
}
+ unbind_to (count, Qnil);
return val;
}
@@ -1541,6 +1558,11 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
if (cache_buffer->base_buffer)
cache_buffer = cache_buffer->base_buffer;
+ /* Prevent quitting inside re_match_2, as redisplay_window could
+ have temporarily moved point. */
+ specpdl_ref count = SPECPDL_INDEX ();
+ specbind (Qinhibit_quit, Qt);
+
while (pos_byte > BEGV_BYTE
&& n++ < MAX_PARAGRAPH_SEARCH
&& fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
@@ -1549,7 +1571,7 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
display string? And what if a display string covering some
of the text over which we scan back includes
paragraph_start_re? */
- DEC_BOTH (pos, pos_byte);
+ dec_both (&pos, &pos_byte);
if (bpc && region_cache_backward (cache_buffer, bpc, pos, &next))
{
pos = next, pos_byte = CHAR_TO_BYTE (pos);
@@ -1558,6 +1580,7 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
else
pos = find_newline_no_quit (pos, pos_byte, -1, &pos_byte);
}
+ unbind_to (count, Qnil);
if (n >= MAX_PARAGRAPH_SEARCH)
pos = BEGV, pos_byte = BEGV_BYTE;
if (bpc)
@@ -1568,6 +1591,9 @@ bidi_find_paragraph_start (ptrdiff_t pos, ptrdiff_t pos_byte)
return pos_byte;
}
+/* This tracks how far we needed to search for first strong character. */
+static ptrdiff_t nsearch_for_strong;
+
/* On a 3.4 GHz machine, searching forward for a strong directional
character in a long paragraph full of weaks or neutrals takes about
1 ms for each 20K characters. The number below limits each call to
@@ -1637,6 +1663,8 @@ find_first_strong_char (ptrdiff_t pos, ptrdiff_t bytepos, ptrdiff_t end,
pos += *nchars;
bytepos += *ch_len;
}
+
+ nsearch_for_strong += pos - pos1;
return type;
}
@@ -1666,6 +1694,9 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
calls to BYTE_TO_CHAR and its ilk. */
ptrdiff_t begbyte = string_p ? 0 : BEGV_BYTE;
ptrdiff_t end = string_p ? bidi_it->string.schars : ZV;
+ ptrdiff_t pos = bidi_it->charpos;
+
+ nsearch_for_strong = 0;
/* Special case for an empty buffer. */
if (bytepos == begbyte && bidi_it->charpos == end)
@@ -1687,7 +1718,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
else if (dir == NEUTRAL_DIR) /* P2 */
{
ptrdiff_t ch_len, nchars;
- ptrdiff_t pos, disp_pos = -1;
+ ptrdiff_t disp_pos = -1;
int disp_prop = 0;
bidi_type_t type;
const unsigned char *s;
@@ -1762,7 +1793,7 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
/* FXIME: What if p is covered by a display
string? See also a FIXME inside
bidi_find_paragraph_start. */
- DEC_BOTH (p, pbyte);
+ dec_both (&p, &pbyte);
prevpbyte = bidi_find_paragraph_start (p, pbyte);
}
pstartbyte = prevpbyte;
@@ -1785,6 +1816,14 @@ bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it, bool no_default_p)
bidi_it->level_stack[0].level = 0;
bidi_line_init (bidi_it);
+
+ /* The factor of 50 below is a heuristic that needs to be tuned. It
+ means we consider 50 buffer positions examined by this function
+ roughly equivalent to the display engine iterating over a single
+ buffer position. */
+ ptrdiff_t nexamined = bidi_it->charpos - pos + nsearch_for_strong;
+ if (max_redisplay_ticks > 0 && nexamined > 0)
+ update_redisplay_ticks (nexamined / 50, bidi_it->w);
}
@@ -1805,7 +1844,7 @@ bidi_explicit_dir_char (int ch)
eassert (ch == BIDI_EOB);
return false;
}
- ch_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
+ ch_type = (bidi_type_t) XFIXNUM (CHAR_TABLE_REF (bidi_type_table, ch));
return (ch_type == LRE || ch_type == LRO
|| ch_type == RLE || ch_type == RLO
|| ch_type == PDF);
@@ -2551,6 +2590,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it)
bidi_bracket_type_t btype;
bidi_type_t type = bidi_it->type;
bool retval = false;
+ ptrdiff_t n = 0;
/* When scanning backwards, we don't expect any unresolved bidi
bracket characters. */
@@ -2680,6 +2720,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it)
}
old_sidx = bidi_it->stack_idx;
type = bidi_resolve_weak (bidi_it);
+ n++;
/* Skip level runs excluded from this isolating run sequence. */
new_sidx = bidi_it->stack_idx;
if (bidi_it->level_stack[new_sidx].level > current_level
@@ -2703,6 +2744,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it)
goto give_up;
}
type = bidi_resolve_weak (bidi_it);
+ n++;
}
}
if (type == NEUTRAL_B
@@ -2743,6 +2785,7 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it)
(which requires the display engine to copy the cache back and
forth many times). */
if (maxlevel == base_level
+ && (l2r_seen || r2l_seen) /* N0d */
&& ((base_level == 0 && !r2l_seen)
|| (base_level == 1 && !l2r_seen)))
{
@@ -2778,6 +2821,12 @@ bidi_find_bracket_pairs (struct bidi_it *bidi_it)
}
give_up:
+ /* The factor of 20 below is a heuristic that needs to be tuned. It
+ means we consider 20 buffer positions examined by this function
+ roughly equivalent to the display engine iterating over a single
+ buffer position. */
+ if (max_redisplay_ticks > 0 && n > 0)
+ update_redisplay_ticks (n / 20 + 1, bidi_it->w);
return retval;
}
@@ -2905,13 +2954,17 @@ bidi_resolve_brackets (struct bidi_it *bidi_it)
int embedding_level = bidi_it->level_stack[bidi_it->stack_idx].level;
bidi_type_t embedding_type = (embedding_level & 1) ? STRONG_R : STRONG_L;
- eassert (bidi_it->prev_for_neutral.type != UNKNOWN_BT);
eassert (bidi_it->bracket_pairing_pos > bidi_it->charpos);
if (bidi_it->bracket_enclosed_type == embedding_type) /* N0b */
type = embedding_type;
- else
+ else if (bidi_it->bracket_enclosed_type == STRONG_L /* N0c, N0d */
+ || bidi_it->bracket_enclosed_type == STRONG_R)
{
- switch (bidi_it->prev_for_neutral.type)
+ bidi_type_t prev_type_for_neutral = bidi_it->prev_for_neutral.type;
+
+ if (prev_type_for_neutral == UNKNOWN_BT)
+ prev_type_for_neutral = embedding_type;
+ switch (prev_type_for_neutral)
{
case STRONG_R:
case WEAK_EN:
@@ -3343,6 +3396,7 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
else
{
int new_level;
+ ptrdiff_t pos0 = bidi_it->charpos;
/* If we are at end of level, its edges must be cached. */
if (end_flag)
@@ -3378,6 +3432,12 @@ bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, bool end_flag)
bidi_cache_iterator_state (bidi_it, 1, 1);
}
} while (new_level >= level);
+ /* The factor of 50 below is a heuristic that needs to be
+ tuned. It means we consider 50 buffer positions examined by
+ the above call roughly equivalent to the display engine
+ iterating over a single buffer position. */
+ if (max_redisplay_ticks > 0 && bidi_it->charpos > pos0)
+ update_redisplay_ticks ((bidi_it->charpos - pos0) / 50 + 1, bidi_it->w);
}
}
@@ -3549,11 +3609,21 @@ bidi_move_to_visually_next (struct bidi_it *bidi_it)
}
/* Utility function for looking for strong directional characters
- whose bidi type was overridden by a directional override. */
+ whose bidi type was overridden by directional override or embedding
+ or isolate control characters. */
ptrdiff_t
bidi_find_first_overridden (struct bidi_it *bidi_it)
{
- ptrdiff_t found_pos = ZV;
+ ptrdiff_t eob
+ = STRINGP (bidi_it->string.lstring) ? bidi_it->string.schars : ZV;
+ ptrdiff_t found_pos = eob;
+ /* Maximum bidi levels we allow for L2R and R2L characters. Note
+ that these are levels after resolving explicit embeddings,
+ overrides, and isolates, i.e. before resolving implicit levels. */
+ int max_l2r = bidi_it->paragraph_dir == L2R ? 0 : 2;
+ int max_r2l = 1;
+ /* Same for WEAK and NEUTRAL_ON types. */
+ int max_weak = bidi_it->paragraph_dir == L2R ? 1 : 2;
do
{
@@ -3561,14 +3631,31 @@ bidi_find_first_overridden (struct bidi_it *bidi_it)
because the directional overrides are applied by the
former. */
bidi_type_t type = bidi_resolve_weak (bidi_it);
+ unsigned level = bidi_it->level_stack[bidi_it->stack_idx].level;
+ bidi_category_t category = bidi_get_category (bidi_it->orig_type);
+ /* Detect strong L or R types that have been overridden by
+ explicit overrides. */
if ((type == STRONG_R && bidi_it->orig_type == STRONG_L)
|| (type == STRONG_L
&& (bidi_it->orig_type == STRONG_R
- || bidi_it->orig_type == STRONG_AL)))
+ || bidi_it->orig_type == STRONG_AL))
+ /* Detect strong L or R types or WEAK_EN types that were
+ pushed into higher embedding levels (and will thus
+ reorder) by explicit embeddings and isolates. */
+ || ((bidi_it->orig_type == STRONG_L
+ || bidi_it->orig_type == WEAK_EN)
+ && level > max_l2r)
+ || ((bidi_it->orig_type == STRONG_R
+ || bidi_it->orig_type == STRONG_AL)
+ && level > max_r2l)
+ /* Detect other weak or neutral types whose level was
+ tweaked by explicit embeddings and isolates. */
+ || ((category == WEAK || bidi_it->orig_type == NEUTRAL_ON)
+ && level > max_weak))
found_pos = bidi_it->charpos;
- } while (found_pos == ZV
- && bidi_it->charpos < ZV
+ } while (found_pos == eob
+ && bidi_it->charpos < eob
&& bidi_it->ch != BIDI_EOB
&& bidi_it->ch != '\n');
@@ -3586,7 +3673,7 @@ bidi_dump_cached_states (void)
if (bidi_cache_idx == 0)
{
- fprintf (stderr, "The cache is empty.\n");
+ fputs ("The cache is empty.\n", stderr);
return;
}
fprintf (stderr, "Total of %"pD"d state%s in cache:\n",
@@ -3597,13 +3684,11 @@ bidi_dump_cached_states (void)
fputs ("ch ", stderr);
for (i = 0; i < bidi_cache_idx; i++)
fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch);
- fputs ("\n", stderr);
- fputs ("lvl ", stderr);
+ fputs ("\nlvl ", stderr);
for (i = 0; i < bidi_cache_idx; i++)
fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level);
- fputs ("\n", stderr);
- fputs ("pos ", stderr);
+ fputs ("\npos ", stderr);
for (i = 0; i < bidi_cache_idx; i++)
fprintf (stderr, "%*"pD"d", ndigits, bidi_cache[i].charpos);
- fputs ("\n", stderr);
+ putc ('\n', stderr);
}