merge from trunk

author: Ken Raeburn <raeburn@raeburn.org> 2015-11-01 01:42:21 -0400
committer: Ken Raeburn <raeburn@raeburn.org> 2015-11-01 01:42:21 -0400
commit: 39372e1a1032521be74575bb06f95a3898fbae30 (patch)
tree: 754bd242a23d2358ea116126fcb0a629947bd9ec /src/search.c
parent: 6a3121904d76e3b2f63007341d48c5c1af55de80 (diff)
parent: e11aaee266da52937a3a031cb108fe13f68958c3 (diff)
download: emacs-39372e1a1032521be74575bb06f95a3898fbae30.tar.gz
emacs-39372e1a1032521be74575bb06f95a3898fbae30.tar.bz2
emacs-39372e1a1032521be74575bb06f95a3898fbae30.zip
1 files changed, 449 insertions, 220 deletions
diff --git a/src/search.c b/src/search.c
index 8916960cf62..64388b87af9 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1,6 +1,6 @@
 /* String search routines for GNU Emacs.
 
-Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
+Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
 Foundation, Inc.
 
 This file is part of GNU Emacs.
@@ -22,13 +22,11 @@ along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 #include <config.h>
 
 #include "lisp.h"
-#include "category.h"
 #include "character.h"
 #include "buffer.h"
 #include "syntax.h"
 #include "charset.h"
 #include "region-cache.h"
-#include "commands.h"
 #include "blockinput.h"
 #include "intervals.h"
 
@@ -84,12 +82,6 @@ static struct regexp_cache *searchbuf_head;
    Qnil if no searching has been done yet.  */
 /* static Lisp_Object last_thing_searched; */
 
-/* Error condition signaled when regexp compile_pattern fails.  */
-static Lisp_Object Qinvalid_regexp;
-
-/* Error condition used for failing searches.  */
-static Lisp_Object Qsearch_failed;
-
 static void set_search_regs (ptrdiff_t, ptrdiff_t);
 static void save_search_regs (void);
 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
@@ -465,17 +457,18 @@ matched by parenthesis constructs in the pattern.  */)
   return string_match_1 (regexp, string, start, 1);
 }
 
-/* Match REGEXP against STRING, searching all of STRING,
-   and return the index of the match, or negative on failure.
-   This does not clobber the match data.  */
+/* Match REGEXP against STRING using translation table TABLE,
+   searching all of STRING, and return the index of the match,
+   or negative on failure.  This does not clobber the match data.  */
 
 ptrdiff_t
-fast_string_match (Lisp_Object regexp, Lisp_Object string)
+fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
+			    Lisp_Object table)
 {
   ptrdiff_t val;
   struct re_pattern_buffer *bufp;
 
-  bufp = compile_pattern (regexp, 0, Qnil,
+  bufp = compile_pattern (regexp, 0, table,
 			  0, STRING_MULTIBYTE (string));
   immediate_quit = 1;
   re_match_object = string;
@@ -510,26 +503,6 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
   return val;
 }
 
-/* Like fast_string_match but ignore case.  */
-
-ptrdiff_t
-fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
-{
-  ptrdiff_t val;
-  struct re_pattern_buffer *bufp;
-
-  bufp = compile_pattern (regexp, 0, Vascii_canon_table,
-			  0, STRING_MULTIBYTE (string));
-  immediate_quit = 1;
-  re_match_object = string;
-
-  val = re_search (bufp, SSDATA (string),
-		   SBYTES (string), 0,
-		   SBYTES (string), 0);
-  immediate_quit = 0;
-  return val;
-}
-
 /* Match REGEXP against the characters after POS to LIMIT, and return
    the number of matched characters.  If STRING is non-nil, match
    against the characters in it.  In that case, POS and LIMIT are
@@ -602,23 +575,47 @@ fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
    Otherwise, make sure it's off.
    This is our cheezy way of associating an action with the change of
    state of a buffer-local variable.  */
-static void
+static struct region_cache *
 newline_cache_on_off (struct buffer *buf)
 {
+  struct buffer *base_buf = buf;
+  bool indirect_p = false;
+
+  if (buf->base_buffer)
+    {
+      base_buf = buf->base_buffer;
+      indirect_p = true;
+    }
+
+  /* Don't turn on or off the cache in the base buffer, if the value
+     of cache-long-scans of the base buffer is inconsistent with that.
+     This is because doing so will just make the cache pure overhead,
+     since if we turn it on via indirect buffer, it will be
+     immediately turned off by its base buffer.  */
   if (NILP (BVAR (buf, cache_long_scans)))
     {
-      /* It should be off.  */
-      if (buf->newline_cache)
-        {
-          free_region_cache (buf->newline_cache);
-          buf->newline_cache = 0;
-        }
+      if (!indirect_p
+	  || NILP (BVAR (base_buf, cache_long_scans)))
+	{
+	  /* It should be off.  */
+	  if (base_buf->newline_cache)
+	    {
+	      free_region_cache (base_buf->newline_cache);
+	      base_buf->newline_cache = 0;
+	    }
+	}
+      return NULL;
     }
   else
     {
-      /* It should be on.  */
-      if (buf->newline_cache == 0)
-        buf->newline_cache = new_region_cache ();
+      if (!indirect_p
+	  || !NILP (BVAR (base_buf, cache_long_scans)))
+	{
+	  /* It should be on.  */
+	  if (base_buf->newline_cache == 0)
+	    base_buf->newline_cache = new_region_cache ();
+	}
+      return base_buf->newline_cache;
     }
 }
 
@@ -653,6 +650,7 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 {
   struct region_cache *newline_cache;
   int direction;
+  struct buffer *cache_buffer;
 
   if (count > 0)
     {
@@ -669,8 +667,11 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
   if (end_byte == -1)
     end_byte = CHAR_TO_BYTE (end);
 
-  newline_cache_on_off (current_buffer);
-  newline_cache = current_buffer->newline_cache;
+  newline_cache = newline_cache_on_off (current_buffer);
+  if (current_buffer->base_buffer)
+    cache_buffer = current_buffer->base_buffer;
+  else
+    cache_buffer = current_buffer;
 
   if (shortage != 0)
     *shortage = 0;
@@ -687,19 +688,68 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
            examine.  */
 	ptrdiff_t tem, ceiling_byte = end_byte - 1;
 
-        /* If we're looking for a newline, consult the newline cache
-           to see where we can avoid some scanning.  */
+        /* If we're using the newline cache, consult it to see whether
+           we can avoid some scanning.  */
         if (newline_cache)
           {
             ptrdiff_t next_change;
+	    int result = 1;
+
             immediate_quit = 0;
-            while (region_cache_forward
-                   (current_buffer, newline_cache, start, &next_change))
-              start = next_change;
+            while (start < end && result)
+	      {
+		ptrdiff_t lim1;
+
+		result = region_cache_forward (cache_buffer, newline_cache,
+					       start, &next_change);
+		if (result)
+		  {
+		    /* When the cache revalidation is deferred,
+		       next-change might point beyond ZV, which will
+		       cause assertion violation in CHAR_TO_BYTE below.
+		       Limit next_change to ZV to avoid that.  */
+		    if (next_change > ZV)
+		      next_change = ZV;
+		    start = next_change;
+		    lim1 = next_change = end;
+		  }
+		else
+		  lim1 = min (next_change, end);
+
+		/* The cache returned zero for this region; see if
+		   this is because the region is known and includes
+		   only newlines.  While at that, count any newlines
+		   we bump into, and exit if we found enough off them.  */
+		start_byte = CHAR_TO_BYTE (start);
+		while (start < lim1
+		       && FETCH_BYTE (start_byte) == '\n')
+		  {
+		    start_byte++;
+		    start++;
+		    if (--count == 0)
+		      {
+			if (bytepos)
+			  *bytepos = start_byte;
+			return start;
+		      }
+		  }
+		/* If we found a non-newline character before hitting
+		   position where the cache will again return non-zero
+		   (i.e. no newlines beyond that position), it means
+		   this region is not yet known to the cache, and we
+		   must resort to the "dumb loop" method.  */
+		if (start < next_change && !result)
+		  break;
+		result = 1;
+	      }
+	    if (start >= end)
+	      {
+		start = end;
+		start_byte = end_byte;
+		break;
+	      }
             immediate_quit = allow_quit;
 
-	    start_byte = CHAR_TO_BYTE (start);
-
             /* START should never be after END.  */
             if (start_byte > ceiling_byte)
               start_byte = ceiling_byte;
@@ -720,42 +770,45 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 
         {
           /* The termination address of the dumb loop.  */
-          register unsigned char *ceiling_addr
-	    = BYTE_POS_ADDR (ceiling_byte) + 1;
-          register unsigned char *cursor
-	    = BYTE_POS_ADDR (start_byte);
-          unsigned char *base = cursor;
+	  unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
+	  ptrdiff_t lim_byte = ceiling_byte + 1;
 
-          while (cursor < ceiling_addr)
-            {
+	  /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
+	     of the base, the cursor, and the next line.  */
+	  ptrdiff_t base = start_byte - lim_byte;
+	  ptrdiff_t cursor, next;
+
+	  for (cursor = base; cursor < 0; cursor = next)
+	    {
               /* The dumb loop.  */
-	      unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
+	      unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
+	      next = nl ? nl - lim_addr : 0;
 
-              /* If we're looking for newlines, cache the fact that
-                 the region from start to cursor is free of them. */
-              if (newline_cache)
+              /* If we're using the newline cache, cache the fact that
+                 the region we just traversed is free of newlines. */
+              if (newline_cache && cursor != next)
 		{
-		  unsigned char *low = cursor;
-		  unsigned char *lim = nl ? nl : ceiling_addr;
-		  know_region_cache (current_buffer, newline_cache,
-				     BYTE_TO_CHAR (low - base + start_byte),
-				     BYTE_TO_CHAR (lim - base + start_byte));
+		  know_region_cache (cache_buffer, newline_cache,
+				     BYTE_TO_CHAR (lim_byte + cursor),
+				     BYTE_TO_CHAR (lim_byte + next));
+		  /* know_region_cache can relocate buffer text.  */
+		  lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 		}
 
               if (! nl)
 		break;
+	      next++;
 
 	      if (--count == 0)
 		{
 		  immediate_quit = 0;
 		  if (bytepos)
-		    *bytepos = nl + 1 - base + start_byte;
-		  return BYTE_TO_CHAR (nl + 1 - base + start_byte);
+		    *bytepos = lim_byte + next;
+		  return BYTE_TO_CHAR (lim_byte + next);
 		}
-	      cursor = nl + 1;
             }
 
-	  start_byte += ceiling_addr - base;
+	  start_byte = lim_byte;
 	  start = BYTE_TO_CHAR (start_byte);
         }
       }
@@ -769,14 +822,47 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
         if (newline_cache)
           {
             ptrdiff_t next_change;
+	    int result = 1;
+
             immediate_quit = 0;
-            while (region_cache_backward
-                   (current_buffer, newline_cache, start, &next_change))
-              start = next_change;
+            while (start > end && result)
+	      {
+		ptrdiff_t lim1;
+
+		result = region_cache_backward (cache_buffer, newline_cache,
+						start, &next_change);
+		if (result)
+		  {
+		    start = next_change;
+		    lim1 = next_change = end;
+		  }
+		else
+		  lim1 = max (next_change, end);
+		start_byte = CHAR_TO_BYTE (start);
+		while (start > lim1
+		       && FETCH_BYTE (start_byte - 1) == '\n')
+		  {
+		    if (++count == 0)
+		      {
+			if (bytepos)
+			  *bytepos = start_byte;
+			return start;
+		      }
+		    start_byte--;
+		    start--;
+		  }
+		if (start > next_change && !result)
+		  break;
+		result = 1;
+	      }
+	    if (start <= end)
+	      {
+		start = end;
+		start_byte = end_byte;
+		break;
+	      }
             immediate_quit = allow_quit;
 
-	    start_byte = CHAR_TO_BYTE (start);
-
             /* Start should never be at or before end.  */
             if (start_byte <= ceiling_byte)
               start_byte = ceiling_byte + 1;
@@ -794,24 +880,28 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 
         {
           /* The termination address of the dumb loop.  */
-          register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
-          register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
-          unsigned char *base = cursor;
+	  unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
+
+	  /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
+	     the base, the cursor, and the previous line.  These
+	     offsets are at least -1.  */
+	  ptrdiff_t base = start_byte - ceiling_byte;
+	  ptrdiff_t cursor, prev;
 
-          while (cursor >= ceiling_addr)
+	  for (cursor = base; 0 < cursor; cursor = prev)
             {
-	      unsigned char *nl = memrchr (ceiling_addr, '\n',
-					   cursor + 1 - ceiling_addr);
+	      unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
+	      prev = nl ? nl - ceiling_addr : -1;
 
               /* If we're looking for newlines, cache the fact that
-                 the region from after the cursor to start is free of them.  */
-              if (newline_cache)
+                 this line's region is free of them. */
+              if (newline_cache && cursor != prev + 1)
 		{
-		  unsigned char *low = nl ? nl : ceiling_addr - 1;
-		  unsigned char *lim = cursor;
-		  know_region_cache (current_buffer, newline_cache,
-				     BYTE_TO_CHAR (low - base + start_byte),
-				     BYTE_TO_CHAR (lim - base + start_byte));
+		  know_region_cache (cache_buffer, newline_cache,
+				     BYTE_TO_CHAR (ceiling_byte + prev + 1),
+				     BYTE_TO_CHAR (ceiling_byte + cursor));
+		  /* know_region_cache can relocate buffer text.  */
+		  ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 		}
 
               if (! nl)
@@ -821,13 +911,12 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 		{
 		  immediate_quit = 0;
 		  if (bytepos)
-		    *bytepos = nl - base + start_byte;
-		  return BYTE_TO_CHAR (nl - base + start_byte);
+		    *bytepos = ceiling_byte + prev + 1;
+		  return BYTE_TO_CHAR (ceiling_byte + prev + 1);
 		}
-	      cursor = nl - 1;
             }
 
-	  start_byte += ceiling_addr - 1 - base;
+	  start_byte = ceiling_byte;
 	  start = BYTE_TO_CHAR (start_byte);
         }
       }
@@ -859,88 +948,38 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
    If ALLOW_QUIT, set immediate_quit.  That's good to do
    except in special cases.  */
 
-EMACS_INT
+ptrdiff_t
 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 	      ptrdiff_t limit, ptrdiff_t limit_byte,
-	      EMACS_INT count, bool allow_quit)
+	      ptrdiff_t count, bool allow_quit)
 {
-  int direction = ((count > 0) ? 1 : -1);
-
-  unsigned char *cursor;
-  unsigned char *base;
-
-  ptrdiff_t ceiling;
-  unsigned char *ceiling_addr;
-
-  bool old_immediate_quit = immediate_quit;
+  ptrdiff_t charpos, bytepos, shortage;
 
-  if (allow_quit)
-    immediate_quit++;
-
-  if (count > 0)
-    {
-      while (start_byte < limit_byte)
-	{
-	  ceiling =  BUFFER_CEILING_OF (start_byte);
-	  ceiling = min (limit_byte - 1, ceiling);
-	  ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
-	  base = (cursor = BYTE_POS_ADDR (start_byte));
-
-	  do
-	    {
-	      unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
-	      if (! nl)
-		break;
-	      if (--count == 0)
-		{
-		  immediate_quit = old_immediate_quit;
-		  start_byte += nl - base + 1;
-		  start = BYTE_TO_CHAR (start_byte);
-		  TEMP_SET_PT_BOTH (start, start_byte);
-		  return 0;
-		}
-	      cursor = nl + 1;
-	    }
-	  while (cursor < ceiling_addr);
-
-	  start_byte += ceiling_addr - base;
-	}
-    }
+  charpos = find_newline (start, start_byte, limit, limit_byte,
+			  count, &shortage, &bytepos, allow_quit);
+  if (shortage)
+    TEMP_SET_PT_BOTH (limit, limit_byte);
   else
-    {
-      while (start_byte > limit_byte)
-	{
-	  ceiling = BUFFER_FLOOR_OF (start_byte - 1);
-	  ceiling = max (limit_byte, ceiling);
-	  ceiling_addr = BYTE_POS_ADDR (ceiling);
-	  base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
-	  while (1)
-	    {
-	      unsigned char *nl = memrchr (ceiling_addr, '\n',
-					   cursor - ceiling_addr);
-	      if (! nl)
-		break;
-
-	      if (++count == 0)
-		{
-		  immediate_quit = old_immediate_quit;
-		  /* Return the position AFTER the match we found.  */
-		  start_byte += nl - base + 1;
-		  start = BYTE_TO_CHAR (start_byte);
-		  TEMP_SET_PT_BOTH (start, start_byte);
-		  return 0;
-		}
+    TEMP_SET_PT_BOTH (charpos, bytepos);
+  return shortage;
+}
 
-	      cursor = nl;
-	    }
-	  start_byte += ceiling_addr - base;
-	}
-    }
+/* Like above, but always scan from point and report the
+   resulting position in *CHARPOS and *BYTEPOS.  */
 
-  TEMP_SET_PT_BOTH (limit, limit_byte);
-  immediate_quit = old_immediate_quit;
+ptrdiff_t
+scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
+			 ptrdiff_t *bytepos)
+{
+  ptrdiff_t shortage;
 
-  return count * direction;
+  if (count <= 0)
+    *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
+			     &shortage, bytepos, 1);
+  else
+    *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
+			     &shortage, bytepos, 1);
+  return shortage;
 }
 
 /* Like find_newline, but doesn't allow QUITting and doesn't return
@@ -1276,6 +1315,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 	 translation.  Otherwise set to zero later.  */
       int char_base = -1;
       bool boyer_moore_ok = 1;
+      USE_SAFE_ALLOCA;
 
       /* MULTIBYTE says whether the text to be searched is multibyte.
 	 We must convert PATTERN to match that, or we will not really
@@ -1293,7 +1333,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 	  raw_pattern_size_byte
 	    = count_size_as_multibyte (SDATA (string),
 				       raw_pattern_size);
-	  raw_pattern = alloca (raw_pattern_size_byte + 1);
+	  raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
 	  copy_text (SDATA (string), raw_pattern,
 		     SCHARS (string), 0, 1);
 	}
@@ -1307,7 +1347,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 	     the chosen single-byte character set can possibly match.  */
 	  raw_pattern_size = SCHARS (string);
 	  raw_pattern_size_byte = SCHARS (string);
-	  raw_pattern = alloca (raw_pattern_size + 1);
+	  raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
 	  copy_text (SDATA (string), raw_pattern,
 		     SBYTES (string), 1, 0);
 	}
@@ -1315,7 +1355,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
       /* Copy and optionally translate the pattern.  */
       len = raw_pattern_size;
       len_byte = raw_pattern_size_byte;
-      patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
+      SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
       pat = patbuf;
       base_pat = raw_pattern;
       if (multibyte)
@@ -1374,7 +1414,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 
 		      while (boyer_moore_ok)
 			{
-			  if (ASCII_BYTE_P (inverse))
+			  if (ASCII_CHAR_P (inverse))
 			    {
 			      if (this_char_base > 0)
 				boyer_moore_ok = 0;
@@ -1455,13 +1495,15 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
       len_byte = pat - patbuf;
       pat = base_pat = patbuf;
 
-      if (boyer_moore_ok)
-	return boyer_moore (n, pat, len_byte, trt, inverse_trt,
-			    pos_byte, lim_byte,
-			    char_base);
-      else
-	return simple_search (n, pat, raw_pattern_size, len_byte, trt,
-			      pos, pos_byte, lim, lim_byte);
+      EMACS_INT result
+	= (boyer_moore_ok
+	   ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
+			  pos_byte, lim_byte,
+			  char_base)
+	   : simple_search (n, pat, raw_pattern_size, len_byte, trt,
+			    pos, pos_byte, lim, lim_byte));
+      SAFE_FREE ();
+      return result;
     }
 }
 
@@ -1785,7 +1827,7 @@ boyer_moore (EMACS_INT n, unsigned char *base_pat,
 	     matching with CHAR_BASE are to be checked.  */
 	  int ch = -1;
 
-	  if (ASCII_BYTE_P (*ptr) || ! multibyte)
+	  if (ASCII_CHAR_P (*ptr) || ! multibyte)
 	    ch = *ptr;
 	  else if (char_base
 		   && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
@@ -2554,7 +2596,7 @@ since only regular expressions have distinguished subexpressions.  */)
 	    {
 	      FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
 	      if (!buf_multibyte)
-		c = multibyte_char_to_unibyte (c);
+		c = CHAR_TO_BYTE8 (c);
 	    }
 	  else
 	    {
@@ -2577,7 +2619,7 @@ since only regular expressions have distinguished subexpressions.  */)
 		  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
 						      pos, pos_byte);
 		  if (!buf_multibyte && !ASCII_CHAR_P (c))
-		    c = multibyte_char_to_unibyte (c);
+		    c = CHAR_TO_BYTE8 (c);
 		}
 	      else
 		{
@@ -2637,18 +2679,8 @@ since only regular expressions have distinguished subexpressions.  */)
 	}
 
       if (really_changed)
-	{
-	  if (buf_multibyte)
-	    {
-	      ptrdiff_t nchars =
-		multibyte_chars_in_text (substed, substed_len);
-
-	      newtext = make_multibyte_string ((char *) substed, nchars,
-					       substed_len);
-	    }
-	  else
-	    newtext = make_unibyte_string ((char *) substed, substed_len);
-	}
+	newtext = make_specified_string ((const char *) substed, -1,
+					 substed_len, buf_multibyte);
       xfree (substed);
     }
 
@@ -2720,7 +2752,9 @@ SUBEXP, a number, specifies which parenthesized expression in the last
   regexp.
 Value is nil if SUBEXPth pair didn't match, or there were less than
   SUBEXP pairs.
-Zero means the entire text matched by the whole regexp or whole string.  */)
+Zero means the entire text matched by the whole regexp or whole string.
+
+Return value is undefined if the last search failed.  */)
   (Lisp_Object subexp)
 {
   return match_limit (subexp, 1);
@@ -2732,21 +2766,23 @@ SUBEXP, a number, specifies which parenthesized expression in the last
   regexp.
 Value is nil if SUBEXPth pair didn't match, or there were less than
   SUBEXP pairs.
-Zero means the entire text matched by the whole regexp or whole string.  */)
+Zero means the entire text matched by the whole regexp or whole string.
+
+Return value is undefined if the last search failed.  */)
   (Lisp_Object subexp)
 {
   return match_limit (subexp, 0);
 }
 
 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
-       doc: /* Return a list containing all info on what the last search matched.
+       doc: /* Return a list describing what the last search matched.
 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
 All the elements are markers or nil (nil if the Nth pair didn't match)
 if the last match was on a buffer; integers or nil if a string was matched.
 Use `set-match-data' to reinstate the data in this list.
 
 If INTEGERS (the optional first argument) is non-nil, always use
-integers \(rather than markers) to represent buffer positions.  In
+integers (rather than markers) to represent buffer positions.  In
 this case, and if the last match was in a buffer, the buffer will get
 stored as one additional element at the end of the list.
 
@@ -2777,7 +2813,8 @@ Return value is undefined if the last search failed.  */)
 
   prev = Qnil;
 
-  data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
+  USE_SAFE_ALLOCA;
+  SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
 
   len = 0;
   for (i = 0; i < search_regs.num_regs; i++)
@@ -2820,25 +2857,28 @@ Return value is undefined if the last search failed.  */)
 
   /* If REUSE is not usable, cons up the values and return them.  */
   if (! CONSP (reuse))
-    return Flist (len, data);
-
-  /* If REUSE is a list, store as many value elements as will fit
-     into the elements of REUSE.  */
-  for (i = 0, tail = reuse; CONSP (tail);
-       i++, tail = XCDR (tail))
+    reuse = Flist (len, data);
+  else
     {
+      /* If REUSE is a list, store as many value elements as will fit
+	 into the elements of REUSE.  */
+      for (i = 0, tail = reuse; CONSP (tail);
+	   i++, tail = XCDR (tail))
+	{
+	  if (i < len)
+	    XSETCAR (tail, data[i]);
+	  else
+	    XSETCAR (tail, Qnil);
+	  prev = tail;
+	}
+
+      /* If we couldn't fit all value elements into REUSE,
+	 cons up the rest of them and add them to the end of REUSE.  */
       if (i < len)
-	XSETCAR (tail, data[i]);
-      else
-	XSETCAR (tail, Qnil);
-      prev = tail;
+	XSETCDR (prev, Flist (len - i, data + i));
     }
 
-  /* If we couldn't fit all value elements into REUSE,
-     cons up the rest of them and add them to the end of REUSE.  */
-  if (i < len)
-    XSETCDR (prev, Flist (len - i, data + i));
-
+  SAFE_FREE ();
   return reuse;
 }
 
@@ -3043,7 +3083,8 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
 
   CHECK_STRING (string);
 
-  temp = alloca (SBYTES (string) * 2);
+  USE_SAFE_ALLOCA;
+  SAFE_NALLOCA (temp, 2, SBYTES (string));
 
   /* Now copy the data into the new string, inserting escapes. */
 
@@ -3061,10 +3102,194 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
       *out++ = *in;
     }
 
-  return make_specified_string (temp,
-				SCHARS (string) + backslashes_added,
-				out - temp,
-				STRING_MULTIBYTE (string));
+  Lisp_Object result
+    = make_specified_string (temp,
+			     SCHARS (string) + backslashes_added,
+			     out - temp,
+			     STRING_MULTIBYTE (string));
+  SAFE_FREE ();
+  return result;
+}
+
+/* Like find_newline, but doesn't use the cache, and only searches forward.  */
+static ptrdiff_t
+find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
+	       ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
+	       ptrdiff_t *bytepos, bool allow_quit)
+{
+  if (count > 0)
+    {
+      if (!end)
+	end = ZV, end_byte = ZV_BYTE;
+    }
+  else
+    {
+      if (!end)
+	end = BEGV, end_byte = BEGV_BYTE;
+    }
+  if (end_byte == -1)
+    end_byte = CHAR_TO_BYTE (end);
+
+  if (shortage != 0)
+    *shortage = 0;
+
+  immediate_quit = allow_quit;
+
+  if (count > 0)
+    while (start != end)
+      {
+        /* Our innermost scanning loop is very simple; it doesn't know
+           about gaps, buffer ends, or the newline cache.  ceiling is
+           the position of the last character before the next such
+           obstacle --- the last character the dumb search loop should
+           examine.  */
+	ptrdiff_t tem, ceiling_byte = end_byte - 1;
+
+	if (start_byte == -1)
+	  start_byte = CHAR_TO_BYTE (start);
+
+        /* The dumb loop can only scan text stored in contiguous
+           bytes. BUFFER_CEILING_OF returns the last character
+           position that is contiguous, so the ceiling is the
+           position after that.  */
+	tem = BUFFER_CEILING_OF (start_byte);
+	ceiling_byte = min (tem, ceiling_byte);
+
+        {
+          /* The termination address of the dumb loop.  */
+	  unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
+	  ptrdiff_t lim_byte = ceiling_byte + 1;
+
+	  /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
+	     of the base, the cursor, and the next line.  */
+	  ptrdiff_t base = start_byte - lim_byte;
+	  ptrdiff_t cursor, next;
+
+	  for (cursor = base; cursor < 0; cursor = next)
+	    {
+              /* The dumb loop.  */
+	      unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
+	      next = nl ? nl - lim_addr : 0;
+
+              if (! nl)
+		break;
+	      next++;
+
+	      if (--count == 0)
+		{
+		  immediate_quit = 0;
+		  if (bytepos)
+		    *bytepos = lim_byte + next;
+		  return BYTE_TO_CHAR (lim_byte + next);
+		}
+            }
+
+	  start_byte = lim_byte;
+	  start = BYTE_TO_CHAR (start_byte);
+        }
+      }
+
+  immediate_quit = 0;
+  if (shortage)
+    *shortage = count;
+  if (bytepos)
+    {
+      *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
+      eassert (*bytepos == CHAR_TO_BYTE (start));
+    }
+  return start;
+}
+
+DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
+       0, 1, 0,
+       doc: /* Check the newline cache of BUFFER against buffer contents.
+
+BUFFER defaults to the current buffer.
+
+Value is an array of 2 sub-arrays of buffer positions for newlines,
+the first based on the cache, the second based on actually scanning
+the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
+  (Lisp_Object buffer)
+{
+  struct buffer *buf, *old = NULL;
+  ptrdiff_t shortage, nl_count_cache, nl_count_buf;
+  Lisp_Object cache_newlines, buf_newlines, val;
+  ptrdiff_t from, found, i;
+
+  if (NILP (buffer))
+    buf = current_buffer;
+  else
+    {
+      CHECK_BUFFER (buffer);
+      buf = XBUFFER (buffer);
+      old = current_buffer;
+    }
+  if (buf->base_buffer)
+    buf = buf->base_buffer;
+
+  /* If the buffer doesn't have a newline cache, return nil.  */
+  if (NILP (BVAR (buf, cache_long_scans))
+      || buf->newline_cache == NULL)
+    return Qnil;
+
+  /* find_newline can only work on the current buffer.  */
+  if (old != NULL)
+    set_buffer_internal_1 (buf);
+
+  /* How many newlines are there according to the cache?  */
+  find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
+		TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
+  nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
+
+  /* Create vector and populate it.  */
+  cache_newlines = make_uninit_vector (nl_count_cache);
+
+  if (nl_count_cache)
+    {
+      for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
+	{
+	  ptrdiff_t from_byte = CHAR_TO_BYTE (from);
+
+	  found = find_newline (from, from_byte, 0, -1, 1, &shortage,
+				NULL, true);
+	  if (shortage != 0 || i >= nl_count_cache)
+	    break;
+	  ASET (cache_newlines, i, make_number (found - 1));
+	}
+      /* Fill the rest of slots with an invalid position.  */
+      for ( ; i < nl_count_cache; i++)
+	ASET (cache_newlines, i, make_number (-1));
+    }
+
+  /* Now do the same, but without using the cache.  */
+  find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
+		 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
+  nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
+  buf_newlines = make_uninit_vector (nl_count_buf);
+  if (nl_count_buf)
+    {
+      for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
+	{
+	  ptrdiff_t from_byte = CHAR_TO_BYTE (from);
+
+	  found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
+				 NULL, true);
+	  if (shortage != 0 || i >= nl_count_buf)
+	    break;
+	  ASET (buf_newlines, i, make_number (found - 1));
+	}
+      for ( ; i < nl_count_buf; i++)
+	ASET (buf_newlines, i, make_number (-1));
+    }
+
+  /* Construct the value and return it.  */
+  val = make_uninit_vector (2);
+  ASET (val, 0, cache_newlines);
+  ASET (val, 1, buf_newlines);
+
+  if (old != NULL)
+    set_buffer_internal_1 (old);
+  return val;
 }
 
 void
@@ -3087,7 +3312,10 @@ syms_of_search (void)
     }
   searchbuf_head = &searchbufs[0];
 
+  /* Error condition used for failing searches.  */
   DEFSYM (Qsearch_failed, "search-failed");
+
+  /* Error condition signaled when regexp compile_pattern fails.  */
   DEFSYM (Qinvalid_regexp, "invalid-regexp");
 
   Fput (Qsearch_failed, Qerror_conditions,
@@ -3138,4 +3366,5 @@ is to bind it with `let' around a small expression.  */);
   defsubr (&Smatch_data);
   defsubr (&Sset_match_data);
   defsubr (&Sregexp_quote);
+  defsubr (&Snewline_cache_check);
 }
author	Ken Raeburn <raeburn@raeburn.org>	2015-11-01 01:42:21 -0400
committer	Ken Raeburn <raeburn@raeburn.org>	2015-11-01 01:42:21 -0400
commit	39372e1a1032521be74575bb06f95a3898fbae30 (patch)
tree	754bd242a23d2358ea116126fcb0a629947bd9ec /src/search.c
parent	6a3121904d76e3b2f63007341d48c5c1af55de80 (diff)
parent	e11aaee266da52937a3a031cb108fe13f68958c3 (diff)
download	emacs-39372e1a1032521be74575bb06f95a3898fbae30.tar.gz emacs-39372e1a1032521be74575bb06f95a3898fbae30.tar.bz2 emacs-39372e1a1032521be74575bb06f95a3898fbae30.zip