summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp/radix-tree.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/emacs-lisp/radix-tree.el')
-rw-r--r--lisp/emacs-lisp/radix-tree.el188
1 files changed, 188 insertions, 0 deletions
diff --git a/lisp/emacs-lisp/radix-tree.el b/lisp/emacs-lisp/radix-tree.el
new file mode 100644
index 00000000000..a6984b8034c
--- /dev/null
+++ b/lisp/emacs-lisp/radix-tree.el
@@ -0,0 +1,188 @@
+;;; radix-tree.el --- A simple library of radix trees -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2016 Free Software Foundation, Inc.
+
+;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
+;; Keywords:
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; There are many different options for how to represent radix trees
+;; in Elisp. Here I chose a very simple one. A radix-tree can be either:
+;; - a node, of the form ((PREFIX . PTREE) . RTREE) where PREFIX is a string
+;; meaning that everything that starts with PREFIX is in PTREE,
+;; and everything else in RTREE. It also has the property that
+;; everything that starts with the first letter of PREFIX but not with
+;; that whole PREFIX is not in RTREE (i.e. is not in the tree at all).
+;; - anything else is taken as the value to associate with the empty string.
+;; So every node is basically an (improper) alist where each mapping applies
+;; to a different leading letter.
+;;
+;; The main downside of this representation is that the lookup operation
+;; is slower because each level of the tree is an alist rather than some kind
+;; of array, so every level's lookup is O(N) rather than O(1). We could easily
+;; solve this by using char-tables instead of alists, but that would make every
+;; level take up a lot more memory, and it would make the resulting
+;; datastructure harder to read (by a human) when printed out.
+
+;;; Code:
+
+(defun radix-tree--insert (tree key val i)
+ (pcase tree
+ (`((,prefix . ,ptree) . ,rtree)
+ (let* ((ni (+ i (length prefix)))
+ (cmp (compare-strings prefix nil nil key i ni)))
+ (if (eq t cmp)
+ (let ((nptree (radix-tree--insert ptree key val ni)))
+ `((,prefix . ,nptree) . ,rtree))
+ (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
+ (if (zerop n)
+ (let ((nrtree (radix-tree--insert rtree key val i)))
+ `((,prefix . ,ptree) . ,nrtree))
+ (let* ((nprefix (substring prefix 0 n))
+ (kprefix (substring key (+ i n)))
+ (pprefix (substring prefix n))
+ (ktree (if (equal kprefix "") val
+ `((,kprefix . ,val)))))
+ `((,nprefix
+ . ((,pprefix . ,ptree) . ,ktree))
+ . ,rtree)))))))
+ (_
+ (if (= (length key) i) val
+ (let ((prefix (substring key i)))
+ `((,prefix . ,val) . ,tree))))))
+
+(defun radix-tree--remove (tree key i)
+ (pcase tree
+ (`((,prefix . ,ptree) . ,rtree)
+ (let* ((ni (+ i (length prefix)))
+ (cmp (compare-strings prefix nil nil key i ni)))
+ (if (eq t cmp)
+ (pcase (radix-tree--remove ptree key ni)
+ (`nil rtree)
+ (`((,pprefix . ,pptree))
+ `((,(concat prefix pprefix) . ,pptree) . ,rtree))
+ (nptree `((,prefix . ,nptree) . ,rtree)))
+ (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
+ (if (zerop n)
+ (let ((nrtree (radix-tree--remove rtree key i)))
+ `((,prefix . ,ptree) . ,nrtree))
+ tree)))))
+ (_
+ (if (= (length key) i) nil tree))))
+
+
+(defun radix-tree--lookup (tree string i)
+ (pcase tree
+ (`((,prefix . ,ptree) . ,rtree)
+ (let* ((ni (+ i (length prefix)))
+ (cmp (compare-strings prefix nil nil string i ni)))
+ (if (eq t cmp)
+ (radix-tree--lookup ptree string ni)
+ (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
+ (if (zerop n)
+ (radix-tree--lookup rtree string i)
+ (+ i n))))))
+ (val
+ (if (and val (equal (length string) i))
+ (if (integerp val) `(t . ,val) val)
+ i))))
+
+(defun radix-tree--subtree (tree string i)
+ (if (equal (length string) i) tree
+ (pcase tree
+ (`((,prefix . ,ptree) . ,rtree)
+ (let* ((ni (+ i (length prefix)))
+ (cmp (compare-strings prefix nil nil string i ni)))
+ (if (eq t cmp)
+ (radix-tree--subtree ptree string ni)
+ (let ((n (if (< cmp 0) (- -1 cmp) (- cmp 1))))
+ (cond
+ ((zerop n) (radix-tree--subtree rtree string i))
+ ((equal (+ n i) (length string))
+ (let ((nprefix (substring prefix n)))
+ `((,nprefix . ,ptree))))
+ (t nil))))))
+ (_ nil))))
+
+;;; Entry points
+
+(defconst radix-tree-empty nil
+ "The empty radix-tree.")
+
+(defun radix-tree-insert (tree key val)
+ "Insert a mapping from KEY to VAL in radix TREE."
+ (when (consp val) (setq val `(t . ,val)))
+ (if val (radix-tree--insert tree key val 0)
+ (radix-tree--remove tree key 0)))
+
+(defun radix-tree-lookup (tree key)
+ "Return the value associated to KEY in radix TREE.
+If not found, return nil."
+ (pcase (radix-tree--lookup tree key 0)
+ (`(t . ,val) val)
+ ((pred numberp) nil)
+ (val val)))
+
+(defun radix-tree-subtree (tree string)
+ "Return the subtree of TREE rooted at the prefix STRING."
+ (radix-tree--subtree tree string 0))
+
+(eval-and-compile
+ (pcase-defmacro radix-tree-leaf (vpat)
+ ;; FIXME: We'd like to use a negative pattern (not consp), but pcase
+ ;; doesn't support it. Using `atom' works but generates sub-optimal code.
+ `(or `(t . ,,vpat) (and (pred atom) ,vpat))))
+
+(defun radix-tree-iter-subtrees (tree fun)
+ "Apply FUN to every immediate subtree of radix TREE.
+FUN is called with two arguments: PREFIX and SUBTREE.
+You can test if SUBTREE is a leaf (and extract its value) with the
+pcase pattern (radix-tree-leaf PAT)."
+ (while tree
+ (pcase tree
+ (`((,prefix . ,ptree) . ,rtree)
+ (funcall fun prefix ptree)
+ (setq tree rtree))
+ (_ (funcall fun "" tree)
+ (setq tree nil)))))
+
+(defun radix-tree-iter-mappings (tree fun &optional prefix)
+ "Apply FUN to every mapping in TREE.
+FUN is called with two arguments: KEY and VAL.
+PREFIX is only used internally."
+ (radix-tree-iter-subtrees
+ tree
+ (lambda (p s)
+ (let ((nprefix (concat prefix p)))
+ (pcase s
+ ((radix-tree-leaf v) (funcall fun nprefix v))
+ (_ (radix-tree-iter-mappings s fun nprefix)))))))
+
+;; (defun radix-tree->alist (tree)
+;; (let ((al nil))
+;; (radix-tree-iter-mappings tree (lambda (p v) (push (cons p v) al)))
+;; al))
+
+(defun radix-tree-count (tree)
+ (let ((i 0))
+ (radix-tree-iter-mappings tree (lambda (_ _) (setq i (1+ i))))
+ i))
+
+(provide 'radix-tree)
+;;; radix-tree.el ends here