diff options
author | Andrew G Cohen <cohen@andy.bu.edu> | 2022-03-10 09:30:00 +0800 |
---|---|---|
committer | Andrew G Cohen <cohen@andy.bu.edu> | 2022-04-04 07:43:11 +0800 |
commit | 9ff2f0be32be621a0a1953cac2d552afebafe226 (patch) | |
tree | 0be9c62c2e143e8dd00b14a090d73a4badd46eb2 /src/sort.c | |
parent | e091bee8db9926716a3e7778275901696896cbdf (diff) | |
download | emacs-9ff2f0be32be621a0a1953cac2d552afebafe226.tar.gz emacs-9ff2f0be32be621a0a1953cac2d552afebafe226.tar.bz2 emacs-9ff2f0be32be621a0a1953cac2d552afebafe226.zip |
Replace list and vector sorting with TIMSORT algorithm
* src/Makefile.in (base_obj): Add sort.o.
* src/deps.mk (fns.o): Add sort.c.
* src/lisp.h: Add prototypes for inorder, tim_sort.
* src/sort.c: New file providing tim_sort.
* src/fns.c: Remove prototypes for removed routines.
(merge_vectors, sort_vector_inplace, sort_vector_copy): Remove.
(sort_list, sort_vector): Use tim_sort.
* test/src/fns-tests.el (fns-tests-sort): New sorting unit tests.
Diffstat (limited to 'src/sort.c')
-rw-r--r-- | src/sort.c | 974 |
1 files changed, 974 insertions, 0 deletions
diff --git a/src/sort.c b/src/sort.c new file mode 100644 index 00000000000..c7ccfc23055 --- /dev/null +++ b/src/sort.c @@ -0,0 +1,974 @@ +/* Timsort for sequences. + +Copyright (C) 2022 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or (at +your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */ + +/* This is a version of the cpython code implementing the TIMSORT + sorting algorithm described in + https://github.com/python/cpython/blob/main/Objects/listsort.txt. + This algorithm identifies and pushes naturally ordered sublists of + the original list, or "runs", onto a stack, and merges them + periodically according to a merge strategy called "powersort". + State is maintained during the sort in a merge_state structure, + which is passed around as an argument to all the subroutines. A + "stretch" structure includes a pointer to the run BASE of length + LEN along with its POWER (a computed integer used by the powersort + merge strategy that depends on this run and the succeeding run.) */ + + +#include <config.h> +#include "lisp.h" + + +/* MAX_MERGE_PENDING is the maximum number of entries in merge_state's + pending-stretch stack. For a list with n elements, this needs at most + floor(log2(n)) + 1 entries even if we didn't force runs to a + minimal length. So the number of bits in a ptrdiff_t is plenty large + enough for all cases. */ + +#define MAX_MERGE_PENDING (sizeof (ptrdiff_t) * 8) + +/* Once we get into galloping mode, we stay there as long as both runs + win at least GALLOP_WIN_MIN consecutive times. */ + +#define GALLOP_WIN_MIN 7 + +/* A small temp array of size MERGESTATE_TEMP_SIZE is used to avoid + malloc when merging small lists. */ + +#define MERGESTATE_TEMP_SIZE 256 + +struct stretch +{ + Lisp_Object *base; + ptrdiff_t len; + int power; +}; + +struct reloc +{ + Lisp_Object **src; + Lisp_Object **dst; + ptrdiff_t *size; + int order; /* -1 while in merge_lo; +1 while in merg_hi; 0 otherwise. */ +}; + + +typedef struct +{ + Lisp_Object *listbase; + ptrdiff_t listlen; + + /* PENDING is a stack of N pending stretches yet to be merged. + Stretch #i starts at address base[i] and extends for len[i] + elements. */ + + int n; + struct stretch pending[MAX_MERGE_PENDING]; + + /* The variable MIN_GALLOP, initialized to GALLOP_WIN_MIN, controls + when we get *into* galloping mode. merge_lo and merge_hi tend to + nudge it higher for random data, and lower for highly structured + data. */ + + ptrdiff_t min_gallop; + + /* 'A' is temporary storage, able to hold ALLOCED elements, to help + with merges. 'A' initially points to TEMPARRAY, and subsequently + to newly allocated memory if needed. */ + + Lisp_Object *a; + ptrdiff_t alloced; + specpdl_ref count; + Lisp_Object temparray[MERGESTATE_TEMP_SIZE]; + + /* If an exception is thrown while merging we might have to relocate + some list elements from temporary storage back into the list. + RELOC keeps track of the information needed to do this. */ + + struct reloc reloc; + + /* PREDICATE is the lisp comparison predicate for the sort. */ + + Lisp_Object predicate; +} merge_state; + + +/* Return true iff (PREDICATE A B) is non-nil. */ + +static inline bool +inorder (const Lisp_Object predicate, const Lisp_Object a, const Lisp_Object b) +{ + return !NILP (call2 (predicate, a, b)); +} + + +/* Sort the list starting at LO and ending at HI using a stable binary + insertion sort algorithm. On entry the sublist [LO, START) (with + START between LO and HIGH) is known to be sorted (pass START == LO + if you are unsure). Even in case of error, the output will be some + permutation of the input (nothing is lost or duplicated). */ + +static void +binarysort (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, + Lisp_Object *start) +{ + Lisp_Object pred = ms->predicate; + + eassume (lo <= start && start <= hi); + if (lo == start) + ++start; + for (; start < hi; ++start) + { + Lisp_Object *l = lo; + Lisp_Object *r = start; + Lisp_Object pivot = *r; + + eassume (l < r); + do { + Lisp_Object *p = l + ((r - l) >> 1); + if (inorder (pred, pivot, *p)) + r = p; + else + l = p + 1; + } while (l < r); + eassume (l == r); + for (Lisp_Object *p = start; p > l; --p) + p[0] = p[-1]; + *l = pivot; + } +} + + +/* Find and return the length of the "run" (the longest + non-decreasing sequence or the longest strictly decreasing + sequence, with the Boolean *DESCENDING set to 0 in the former + case, or to 1 in the latter) beginning at LO, in the slice [LO, + HI) with LO < HI. The strictness of the definition of + "descending" ensures there are no equal elements to get out of + order so the caller can safely reverse a descending sequence + without violating stability. */ + +static ptrdiff_t +count_run (merge_state *ms, Lisp_Object *lo, const Lisp_Object *hi, + bool *descending) +{ + Lisp_Object pred = ms->predicate; + + eassume (lo < hi); + *descending = 0; + ++lo; + ptrdiff_t n = 1; + if (lo == hi) + return n; + + n = 2; + if (inorder (pred, lo[0], lo[-1])) + { + *descending = 1; + for (lo = lo + 1; lo < hi; ++lo, ++n) + { + if (!inorder (pred, lo[0], lo[-1])) + break; + } + } + else + { + for (lo = lo + 1; lo < hi; ++lo, ++n) + { + if (inorder (pred, lo[0], lo[-1])) + break; + } + } + + return n; +} + + +/* Locate and return the proper insertion position of KEY in a sorted + vector: if the vector contains an element equal to KEY, return the + position immediately to the left of the leftmost equal element. + [GALLOP_RIGHT does the same except it returns the position to the + right of the rightmost equal element (if any).] + + 'A' is a sorted vector of N elements. N must be > 0. + + Elements preceding HINT, a non-negative index less than N, are + skipped. The closer HINT is to the final result, the faster this + runs. + + The return value is the int k in [0, N] such that + + A[k-1] < KEY <= a[k] + + pretending that *(A-1) precedes all values and *(A+N) succeeds all + values. In other words, the first k elements of A should precede + KEY, and the last N-k should follow KEY. */ + +static ptrdiff_t +gallop_left (merge_state *ms, const Lisp_Object key, Lisp_Object *a, + const ptrdiff_t n, const ptrdiff_t hint) +{ + Lisp_Object pred = ms->predicate; + + eassume (a && n > 0 && hint >= 0 && hint < n); + + a += hint; + ptrdiff_t lastofs = 0; + ptrdiff_t ofs = 1; + if (inorder (pred, *a, key)) + { + /* When a[hint] < key, gallop right until + a[hint + lastofs] < key <= a[hint + ofs]. */ + const ptrdiff_t maxofs = n - hint; /* This is one after the end of a. */ + while (ofs < maxofs) + { + if (inorder (pred, a[ofs], key)) + { + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + else + break; /* Here key <= a[hint+ofs]. */ + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + else + { + /* When key <= a[hint], gallop left, until + a[hint - ofs] < key <= a[hint - lastofs]. */ + const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ + while (ofs < maxofs) + { + if (inorder (pred, a[-ofs], key)) + break; + /* Here key <= a[hint - ofs]. */ + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use positive offsets relative to &a[0]. */ + ptrdiff_t k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + a -= hint; + + eassume (-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] < key <= a[ofs], so key belongs somewhere to the + right of lastofs but no farther right than ofs. Do a binary + search, with invariant a[lastofs-1] < key <= a[ofs]. */ + ++lastofs; + while (lastofs < ofs) + { + ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); + + if (inorder (pred, a[m], key)) + lastofs = m + 1; /* Here a[m] < key. */ + else + ofs = m; /* Here key <= a[m]. */ + } + eassume (lastofs == ofs); /* Then a[ofs-1] < key <= a[ofs]. */ + return ofs; +} + + +/* Locate and return the proper position of KEY in a sorted vector + exactly like GALLOP_LEFT, except that if KEY already exists in + A[0:N] find the position immediately to the right of the rightmost + equal value. + + The return value is the int k in [0, N] such that + + A[k-1] <= KEY < A[k]. */ + +static ptrdiff_t +gallop_right (merge_state *ms, const Lisp_Object key, Lisp_Object *a, + const ptrdiff_t n, const ptrdiff_t hint) +{ + Lisp_Object pred = ms->predicate; + + eassume (a && n > 0 && hint >= 0 && hint < n); + + a += hint; + ptrdiff_t lastofs = 0; + ptrdiff_t ofs = 1; + if (inorder (pred, key, *a)) + { + /* When key < a[hint], gallop left until + a[hint - ofs] <= key < a[hint - lastofs]. */ + const ptrdiff_t maxofs = hint + 1; /* Here &a[0] is lowest. */ + while (ofs < maxofs) + { + if (inorder (pred, key, a[-ofs])) + { + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + else /* Here a[hint - ofs] <= key. */ + break; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use positive offsets relative to &a[0]. */ + ptrdiff_t k = lastofs; + lastofs = hint - ofs; + ofs = hint - k; + } + else + { + /* When a[hint] <= key, gallop right, until + a[hint + lastofs] <= key < a[hint + ofs]. */ + const ptrdiff_t maxofs = n - hint; /* Here &a[n-1] is highest. */ + while (ofs < maxofs) + { + if (inorder (pred, key, a[ofs])) + break; + /* Here a[hint + ofs] <= key. */ + lastofs = ofs; + eassume (ofs <= (PTRDIFF_MAX - 1) / 2); + ofs = (ofs << 1) + 1; + } + if (ofs > maxofs) + ofs = maxofs; + /* Translate back to use offsets relative to &a[0]. */ + lastofs += hint; + ofs += hint; + } + a -= hint; + + eassume (-1 <= lastofs && lastofs < ofs && ofs <= n); + /* Now a[lastofs] <= key < a[ofs], so key belongs somewhere to the + right of lastofs but no farther right than ofs. Do a binary + search, with invariant a[lastofs-1] <= key < a[ofs]. */ + ++lastofs; + while (lastofs < ofs) + { + ptrdiff_t m = lastofs + ((ofs - lastofs) >> 1); + + if (inorder (pred, key, a[m])) + ofs = m; /* Here key < a[m]. */ + else + lastofs = m + 1; /* Here a[m] <= key. */ + } + eassume (lastofs == ofs); /* Now a[ofs-1] <= key < a[ofs]. */ + return ofs; +} + + +static void +merge_init (merge_state *ms, const ptrdiff_t list_size, Lisp_Object *lo, + const Lisp_Object predicate) +{ + eassume (ms != NULL); + + ms->a = ms->temparray; + ms->alloced = MERGESTATE_TEMP_SIZE; + + ms->n = 0; + ms->min_gallop = GALLOP_WIN_MIN; + ms->listlen = list_size; + ms->listbase = lo; + ms->predicate = predicate; + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; +} + + +/* The dynamically allocated memory may hold lisp objects during + merging. MERGE_MARKMEM marks them so they aren't reaped during + GC. */ + +static void +merge_markmem (void *arg) +{ + merge_state *ms = arg; + eassume (ms != NULL); + + if (ms->reloc.size != NULL && *ms->reloc.size > 0) + { + eassume (ms->reloc.src != NULL); + mark_objects (*ms->reloc.src, *ms->reloc.size); + } +} + + +/* Free all temp storage. If an exception occurs while merging, + relocate any lisp elements in temp storage back to the original + array. */ + +static void +cleanup_mem (void *arg) +{ + merge_state *ms = arg; + eassume (ms != NULL); + + /* If we have an exception while merging, some of the list elements + might only live in temp storage; we copy everything remaining in + the temp storage back into the original list. This ensures that + the original list has all of the original elements, although + their order is unpredictable. */ + + if (ms->reloc.order != 0 && *ms->reloc.size > 0) + { + eassume (*ms->reloc.src != NULL && *ms->reloc.dst != NULL); + ptrdiff_t n = *ms->reloc.size; + ptrdiff_t shift = ms->reloc.order == -1 ? 0 : n - 1; + memcpy (*ms->reloc.dst - shift, *ms->reloc.src, n * word_size); + } + + /* Free any remaining temp storage. */ + xfree (ms->a); +} + + +/* Allocate enough temp memory for NEED array slots. Any previously + allocated memory is first freed, and a cleanup routine is + registered to free memory at the very end of the sort, or on + exception. */ + +static void +merge_getmem (merge_state *ms, const ptrdiff_t need) +{ + eassume (ms != NULL); + + if (ms->a == ms->temparray) + { + /* We only get here if alloc is needed and this is the first + time, so we set up the unwind protection. */ + specpdl_ref count = SPECPDL_INDEX (); + record_unwind_protect_ptr_mark (cleanup_mem, ms, merge_markmem); + ms->count = count; + } + else + { + /* We have previously alloced storage. Since we don't care + what's in the block we don't use realloc which would waste + cycles copying the old data. We just free and alloc + again. */ + xfree (ms->a); + } + ms->a = xmalloc (need * word_size); + ms->alloced = need; +} + + +static inline void +needmem (merge_state *ms, ptrdiff_t na) +{ + if (na > ms->alloced) + merge_getmem (ms, na); +} + + +/* Stably merge (in-place) the NA elements starting at SSA with the NB + elements starting at SSB = SSA + NA. NA and NB must be positive. + Require that SSA[NA-1] belongs at the end of the merge, and NA <= + NB. */ + +static void +merge_lo (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, Lisp_Object *ssb, + ptrdiff_t nb) +{ + Lisp_Object pred = ms->predicate; + + eassume (ms && ssa && ssb && na > 0 && nb > 0); + eassume (ssa + na == ssb); + needmem (ms, na); + memcpy (ms->a, ssa, na * word_size); + Lisp_Object *dest = ssa; + ssa = ms->a; + + ms->reloc = (struct reloc){&ssa, &dest, &na, -1}; + + *dest++ = *ssb++; + --nb; + if (nb == 0) + goto Succeed; + if (na == 1) + goto CopyB; + + ptrdiff_t min_gallop = ms->min_gallop; + for (;;) + { + ptrdiff_t acount = 0; /* The # of consecutive times A won. */ + + ptrdiff_t bcount = 0; /* The # of consecutive times B won. */ + + for (;;) + { + eassume (na > 1 && nb > 0); + if (inorder (pred, *ssb, *ssa)) + { + *dest++ = *ssb++ ; + ++bcount; + acount = 0; + --nb; + if (nb == 0) + goto Succeed; + if (bcount >= min_gallop) + break; + } + else + { + *dest++ = *ssa++; + ++acount; + bcount = 0; + --na; + if (na == 1) + goto CopyB; + if (acount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may be a + huge speedup. We try that, and continue galloping until (if + ever) neither run appears to be winning consistently + anymore. */ + ++min_gallop; + do { + eassume (na > 1 && nb > 0); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + ptrdiff_t k = gallop_right (ms, ssb[0], ssa, na, 0); + acount = k; + if (k) + { + memcpy (dest, ssa, k * word_size); + dest += k; + ssa += k; + na -= k; + if (na == 1) + goto CopyB; + /* While na==0 is impossible for a consistent comparison + function, we shouldn't assume that it is. */ + if (na == 0) + goto Succeed; + } + *dest++ = *ssb++ ; + --nb; + if (nb == 0) + goto Succeed; + + k = gallop_left (ms, ssa[0], ssb, nb, 0); + bcount = k; + if (k) + { + memmove (dest, ssb, k * word_size); + dest += k; + ssb += k; + nb -= k; + if (nb == 0) + goto Succeed; + } + *dest++ = *ssa++; + --na; + if (na == 1) + goto CopyB; + } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN); + ++min_gallop; /* Apply a penalty for leaving galloping mode. */ + ms->min_gallop = min_gallop; + } + Succeed: + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + + if (na) + memcpy (dest, ssa, na * word_size); + return; + CopyB: + eassume (na == 1 && nb > 0); + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + + /* The last element of ssa belongs at the end of the merge. */ + memmove (dest, ssb, nb * word_size); + dest[nb] = ssa[0]; +} + + +/* Stably merge (in-place) the NA elements starting at SSA with the NB + elements starting at SSB = SSA + NA. NA and NB must be positive. + Require that SSA[NA-1] belongs at the end of the merge, and NA >= + NB. */ + +static void +merge_hi (merge_state *ms, Lisp_Object *ssa, ptrdiff_t na, + Lisp_Object *ssb, ptrdiff_t nb) +{ + Lisp_Object pred = ms->predicate; + + eassume (ms && ssa && ssb && na > 0 && nb > 0); + eassume (ssa + na == ssb); + needmem (ms, nb); + Lisp_Object *dest = ssb; + dest += nb - 1; + memcpy(ms->a, ssb, nb * word_size); + Lisp_Object *basea = ssa; + Lisp_Object *baseb = ms->a; + ssb = ms->a + nb - 1; + ssa += na - 1; + + ms->reloc = (struct reloc){&baseb, &dest, &nb, 1}; + + *dest-- = *ssa--; + --na; + if (na == 0) + goto Succeed; + if (nb == 1) + goto CopyA; + + ptrdiff_t min_gallop = ms->min_gallop; + for (;;) { + ptrdiff_t acount = 0; /* The # of consecutive times A won. */ + ptrdiff_t bcount = 0; /* The # of consecutive times B won. */ + + for (;;) { + eassume (na > 0 && nb > 1); + if (inorder (pred, *ssb, *ssa)) + { + *dest-- = *ssa--; + ++acount; + bcount = 0; + --na; + if (na == 0) + goto Succeed; + if (acount >= min_gallop) + break; + } + else + { + *dest-- = *ssb--; + ++bcount; + acount = 0; + --nb; + if (nb == 1) + goto CopyA; + if (bcount >= min_gallop) + break; + } + } + + /* One run is winning so consistently that galloping may be a huge + speedup. Try that, and continue galloping until (if ever) + neither run appears to be winning consistently anymore. */ + ++min_gallop; + do { + eassume (na > 0 && nb > 1); + min_gallop -= min_gallop > 1; + ms->min_gallop = min_gallop; + ptrdiff_t k = gallop_right (ms, ssb[0], basea, na, na - 1); + k = na - k; + acount = k; + if (k) + { + dest += -k; + ssa += -k; + memmove(dest + 1, ssa + 1, k * word_size); + na -= k; + if (na == 0) + goto Succeed; + } + *dest-- = *ssb--; + --nb; + if (nb == 1) + goto CopyA; + + k = gallop_left (ms, ssa[0], baseb, nb, nb - 1); + k = nb - k; + bcount = k; + if (k) + { + dest += -k; + ssb += -k; + memcpy(dest + 1, ssb + 1, k * word_size); + nb -= k; + if (nb == 1) + goto CopyA; + /* While nb==0 is impossible for a consistent comparison + function we shouldn't assume that it is. */ + if (nb == 0) + goto Succeed; + } + *dest-- = *ssa--; + --na; + if (na == 0) + goto Succeed; + } while (acount >= GALLOP_WIN_MIN || bcount >= GALLOP_WIN_MIN); + ++min_gallop; /* Apply a penalty for leaving galloping mode. */ + ms->min_gallop = min_gallop; + } + Succeed: + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + if (nb) + memcpy (dest - nb + 1, baseb, nb * word_size); + return; + CopyA: + eassume (nb == 1 && na > 0); + ms->reloc = (struct reloc){NULL, NULL, NULL, 0}; + /* The first element of ssb belongs at the front of the merge. */ + memmove (dest + 1 - na, ssa + 1 - na, na * word_size); + dest += -na; + ssa += -na; + dest[0] = ssb[0]; +} + + +/* Merge the two runs at stack indices I and I+1. */ + +static void +merge_at (merge_state *ms, const ptrdiff_t i) +{ + eassume (ms != NULL); + eassume (ms->n >= 2); + eassume (i >= 0); + eassume (i == ms->n - 2 || i == ms->n - 3); + + Lisp_Object *ssa = ms->pending[i].base; + ptrdiff_t na = ms->pending[i].len; + Lisp_Object *ssb = ms->pending[i + 1].base; + ptrdiff_t nb = ms->pending[i + 1].len; + eassume (na > 0 && nb > 0); + eassume (ssa + na == ssb); + + /* Record the length of the combined runs. The current run i+1 goes + away after the merge. If i is the 3rd-last run now, slide the + last run (which isn't involved in this merge) over to i+1. */ + ms->pending[i].len = na + nb; + if (i == ms->n - 3) + ms->pending[i + 1] = ms->pending[i + 2]; + --ms->n; + + /* Where does b start in a? Elements in a before that can be + ignored (they are already in place). */ + ptrdiff_t k = gallop_right (ms, *ssb, ssa, na, 0); + eassume (k >= 0); + ssa += k; + na -= k; + if (na == 0) + return; + + /* Where does a end in b? Elements in b after that can be ignored + (they are already in place). */ + nb = gallop_left (ms, ssa[na - 1], ssb, nb, nb - 1); + if (nb == 0) + return; + eassume (nb > 0); + /* Merge what remains of the runs using a temp array with size + min(na, nb) elements. */ + if (na <= nb) + merge_lo (ms, ssa, na, ssb, nb); + else + merge_hi (ms, ssa, na, ssb, nb); +} + + +/* Compute the "power" of the first of two adjacent runs begining at + index S1, with the first having length N1 and the second (starting + at index S1+N1) having length N2. The run has total length N. */ + +static int +powerloop (const ptrdiff_t s1, const ptrdiff_t n1, const ptrdiff_t n2, + const ptrdiff_t n) +{ + eassume (s1 >= 0); + eassume (n1 > 0 && n2 > 0); + eassume (s1 + n1 + n2 <= n); + /* The midpoints a and b are + a = s1 + n1/2 + b = s1 + n1 + n2/2 = a + (n1 + n2)/2 + + These may not be integers because of the "/2", so we work with + 2*a and 2*b instead. It makes no difference to the outcome, + since the bits in the expansion of (2*i)/n are merely shifted one + position from those of i/n. */ + ptrdiff_t a = 2 * s1 + n1; + ptrdiff_t b = a + n1 + n2; + int result = 0; + /* Emulate a/n and b/n one bit a time, until their bits differ. */ + for (;;) + { + ++result; + if (a >= n) + { /* Both quotient bits are now 1. */ + eassume (b >= a); + a -= n; + b -= n; + } + else if (b >= n) + { /* a/n bit is 0 and b/n bit is 1. */ + break; + } /* Otherwise both quotient bits are 0. */ + eassume (a < b && b < n); + a <<= 1; + b <<= 1; + } + return result; +} + + +/* Update the state upon identifying a run of length N2. If there's + already a stretch on the stack, apply the "powersort" merge + strategy: compute the topmost stretch's "power" (depth in a + conceptual binary merge tree) and merge adjacent runs on the stack + with greater power. */ + +static void +found_new_run (merge_state *ms, const ptrdiff_t n2) +{ + eassume (ms != NULL); + if (ms->n) + { + eassume (ms->n > 0); + struct stretch *p = ms->pending; + ptrdiff_t s1 = p[ms->n - 1].base - ms->listbase; + ptrdiff_t n1 = p[ms->n - 1].len; + int power = powerloop (s1, n1, n2, ms->listlen); + while (ms->n > 1 && p[ms->n - 2].power > power) + { + merge_at (ms, ms->n - 2); + } + eassume (ms->n < 2 || p[ms->n - 2].power < power); + p[ms->n - 1].power = power; + } +} + + +/* Unconditionally merge all stretches on the stack until only one + remains. */ + +static void +merge_force_collapse (merge_state *ms) +{ + struct stretch *p = ms->pending; + + eassume (ms != NULL); + while (ms->n > 1) + { + ptrdiff_t n = ms->n - 2; + if (n > 0 && p[n - 1].len < p[n + 1].len) + --n; + merge_at (ms, n); + } +} + + +/* Compute a good value for the minimum run length; natural runs + shorter than this are boosted artificially via binary insertion. + + If N < 64, return N (it's too small to bother with fancy stuff). + Otherwise if N is an exact power of 2, return 32. Finally, return + an int k, 32 <= k <= 64, such that N/k is close to, but strictly + less than, an exact power of 2. */ + +static ptrdiff_t +merge_compute_minrun (ptrdiff_t n) +{ + ptrdiff_t r = 0; /* r will become 1 if any non-zero bits are + shifted off. */ + + eassume (n >= 0); + while (n >= 64) + { + r |= n & 1; + n >>= 1; + } + return n + r; +} + + +static void +reverse_vector (Lisp_Object *s, const ptrdiff_t n) +{ + for (ptrdiff_t i = 0; i < n >> 1; i++) + { + Lisp_Object tem = s[i]; + s[i] = s[n - i - 1]; + s[n - i - 1] = tem; + } +} + +/* Sort the array SEQ with LENGTH elements in the order determined by + PREDICATE. */ + +void +tim_sort (Lisp_Object predicate, Lisp_Object *seq, const ptrdiff_t length) +{ + if (SYMBOLP (predicate)) + { + /* Attempt to resolve the function as far as possible ahead of time, + to avoid having to do it for each call. */ + Lisp_Object fun = XSYMBOL (predicate)->u.s.function; + if (SYMBOLP (fun)) + /* Function was an alias; use slow-path resolution. */ + fun = indirect_function (fun); + /* Don't resolve to an autoload spec; that would be very slow. */ + if (!NILP (fun) && !(CONSP (fun) && EQ (XCAR (fun), Qautoload))) + predicate = fun; + } + + merge_state ms; + Lisp_Object *lo = seq; + + merge_init (&ms, length, lo, predicate); + + /* March over the array once, left to right, finding natural runs, + and extending short natural runs to minrun elements. */ + const ptrdiff_t minrun = merge_compute_minrun (length); + ptrdiff_t nremaining = length; + do { + bool descending; + + /* Identify the next run. */ + ptrdiff_t n = count_run (&ms, lo, lo + nremaining, &descending); + if (descending) + reverse_vector (lo, n); + /* If the run is short, extend it to min(minrun, nremaining). */ + if (n < minrun) + { + const ptrdiff_t force = nremaining <= minrun ? + nremaining : minrun; + binarysort (&ms, lo, lo + force, lo + n); + n = force; + } + eassume (ms.n == 0 || ms.pending[ms.n - 1].base + + ms.pending[ms.n - 1].len == lo); + found_new_run (&ms, n); + /* Push the new run on to the stack. */ + eassume (ms.n < MAX_MERGE_PENDING); + ms.pending[ms.n].base = lo; + ms.pending[ms.n].len = n; + ++ms.n; + /* Advance to find the next run. */ + lo += n; + nremaining -= n; + } while (nremaining); + + merge_force_collapse (&ms); + eassume (ms.n == 1); + eassume (ms.pending[0].len == length); + lo = ms.pending[0].base; + + if (ms.a != ms.temparray) + unbind_to (ms.count, Qnil); +} |