summaryrefslogtreecommitdiff
path: root/lisp/emacs-lisp/bindat.el
diff options
context:
space:
mode:
Diffstat (limited to 'lisp/emacs-lisp/bindat.el')
-rw-r--r--lisp/emacs-lisp/bindat.el567
1 files changed, 416 insertions, 151 deletions
diff --git a/lisp/emacs-lisp/bindat.el b/lisp/emacs-lisp/bindat.el
index 830e61f8516..adf2d672849 100644
--- a/lisp/emacs-lisp/bindat.el
+++ b/lisp/emacs-lisp/bindat.el
@@ -62,39 +62,40 @@
;; struct data item[/* items */];
;; };
;;
-;; The corresponding Lisp bindat specification looks like this:
+;; The corresponding Lisp bindat specification could look like this:
+;;
+;; (bindat-defmacro ip () '(vec 4 byte))
;;
;; (setq header-bindat-spec
-;; (bindat-spec
+;; (bindat-type
;; (dest-ip ip)
;; (src-ip ip)
-;; (dest-port u16)
-;; (src-port u16)))
+;; (dest-port uint 16)
+;; (src-port uint 16)))
;;
;; (setq data-bindat-spec
-;; (bindat-spec
+;; (bindat-type
;; (type u8)
;; (opcode u8)
-;; (length u32r) ;; little endian order
+;; (length uintr 32) ;; little endian order
;; (id strz 8)
-;; (data vec (length))
-;; (align 4)))
+;; (data vec length)
+;; (_ align 4)))
;;
;; (setq packet-bindat-spec
-;; (bindat-spec
-;; (header struct header-bindat-spec)
-;; (items u8)
-;; (fill 3)
-;; (item repeat (items)
-;; (struct data-bindat-spec))))
-;;
+;; (bindat-type
+;; (header type header-bindat-spec)
+;; (nitems u8)
+;; (_ fill 3)
+;; (items repeat nitems type data-bindat-spec)))
;;
;; A binary data representation may look like
;; [ 192 168 1 100 192 168 1 101 01 28 21 32 2 0 0 0
;; 2 3 5 0 ?A ?B ?C ?D ?E ?F 0 0 1 2 3 4 5 0 0 0
;; 1 4 7 0 ?B ?C ?D ?E ?F ?G 0 0 6 7 8 9 10 11 12 0 ]
;;
-;; The corresponding decoded structure looks like
+;; The corresponding decoded structure returned by `bindat-unpack' (or taken
+;; by `bindat-pack') looks like:
;;
;; ((header
;; (dest-ip . [192 168 1 100])
@@ -114,90 +115,24 @@
;; (type . 1))))
;;
;; To access a specific value in this structure, use the function
-;; bindat-get-field with the structure as first arg followed by a list
+;; `bindat-get-field' with the structure as first arg followed by a list
;; of field names and array indexes, e.g. using the data above,
;; (bindat-get-field decoded-structure 'item 1 'id)
;; returns "BCDEFG".
-;; Binary Data Structure Specification Format
-;; ------------------------------------------
-
-;; We recommend using names that end in `-bindat-spec'; such names
-;; are recognized automatically as "risky" variables.
-
-;; The data specification is formatted as follows:
-
-;; SPEC ::= ( ITEM... )
-
-;; ITEM ::= ( FIELD TYPE )
-;; | ( [FIELD] eval FORM ) -- eval FORM for side-effect only
-;; | ( [FIELD] fill LEN ) -- skip LEN bytes
-;; | ( [FIELD] align LEN ) -- skip to next multiple of LEN bytes
-;; | ( [FIELD] struct SPEC_NAME )
-;; | ( [FIELD] union TAG_VAL (TAG SPEC)... [(t SPEC)] )
-;; | ( FIELD repeat ARG ITEM... )
-
-;; -- In (eval EXPR), the value of the last field is available in
-;; the dynamically bound variable `last' and all the previous
-;; ones in the variable `struct'.
-
-;; TYPE ::= ( eval EXPR ) -- interpret result as TYPE
-;; | u8 | byte -- length 1
-;; | u16 | word | short -- length 2, network byte order
-;; | u24 -- 3-byte value
-;; | u32 | dword | long -- length 4, network byte order
-;; | u64 -- length 8, network byte order
-;; | u16r | u24r | u32r | u64r - little endian byte order.
-;; | str LEN -- LEN byte string
-;; | strz LEN -- LEN byte (zero-terminated) string
-;; | vec LEN [TYPE] -- vector of LEN items of TYPE (default: u8)
-;; | ip -- 4 byte vector
-;; | bits LEN -- bit vector using LEN bytes.
-;;
-;; -- Example: `bits 2' will unpack 0x28 0x1c to (2 3 4 11 13)
-;; and 0x1c 0x28 to (3 5 10 11 12).
-
-;; FIELD ::= ( eval EXPR ) -- use result as NAME
-;; | NAME
-
-;; LEN ::= ARG
-;; | <omitted> | nil -- LEN = 1
-
-
-;; TAG_VAL ::= ARG
-
-;; TAG ::= LISP_CONSTANT
-;; | ( eval EXPR ) -- return non-nil if tag match;
-;; current TAG_VAL in `tag'.
-
-;; ARG ::= ( eval EXPR ) -- interpret result as ARG
-;; | INTEGER_CONSTANT
-;; | DEREF
-
-;; DEREF ::= ( [NAME | INTEGER]... ) -- Field NAME or Array index relative
-;; to current structure spec.
-;; -- see bindat-get-field
-
-;; A `union' specification
-;; ([FIELD] union TAG_VAL (TAG SPEC) ... [(t SPEC)])
-;; is interpreted by evalling TAG_VAL and then comparing that to
-;; each TAG using equal; if a match is found, the corresponding SPEC
-;; is used.
-;; If TAG is a form (eval EXPR), EXPR is eval'ed with `tag' bound to the
-;; value of TAG_VAL; the corresponding SPEC is used if the result is non-nil.
-;; Finally, if TAG is t, the corresponding SPEC is used unconditionally.
-;;
-;; An `eval' specification
-;; ([FIELD] eval FORM)
-;; is interpreted by evalling FORM for its side effects only.
-;; If FIELD is specified, the value is bound to that field.
-;; The FORM may access and update `bindat-raw' and `bindat-idx' (see `bindat-unpack').
-
;;; Code:
;; Helper functions for structure unpacking.
;; Relies on dynamic binding of `bindat-raw' and `bindat-idx'.
+(eval-when-compile (require 'cl-lib))
+(eval-when-compile (require 'subr-x)) ;For `named-let'.
+
+(cl-defstruct (bindat--type
+ (:predicate nil)
+ (:constructor bindat--make))
+ le ue pe)
+
(defvar bindat-raw)
(defvar bindat-idx)
@@ -215,9 +150,6 @@
(defun bindat--unpack-u32 ()
(logior (ash (bindat--unpack-u16) 16) (bindat--unpack-u16)))
-(defun bindat--unpack-u64 ()
- (logior (ash (bindat--unpack-u32) 32) (bindat--unpack-u32)))
-
(defun bindat--unpack-u16r ()
(logior (bindat--unpack-u8) (ash (bindat--unpack-u8) 8)))
@@ -227,9 +159,6 @@
(defun bindat--unpack-u32r ()
(logior (bindat--unpack-u16r) (ash (bindat--unpack-u16r) 16)))
-(defun bindat--unpack-u64r ()
- (logior (bindat--unpack-u32r) (ash (bindat--unpack-u32r) 32)))
-
(defun bindat--unpack-str (len)
(let ((s (substring bindat-raw bindat-idx (+ bindat-idx len))))
(setq bindat-idx (+ bindat-idx len))
@@ -266,11 +195,9 @@
((or 'u16 'word 'short) (bindat--unpack-u16))
('u24 (bindat--unpack-u24))
((or 'u32 'dword 'long) (bindat--unpack-u32))
- ('u64 (bindat--unpack-u64))
('u16r (bindat--unpack-u16r))
('u24r (bindat--unpack-u24r))
('u32r (bindat--unpack-u32r))
- ('u64r (bindat--unpack-u64r))
('bits (bindat--unpack-bits len))
('str (bindat--unpack-str len))
('strz (bindat--unpack-strz len))
@@ -290,6 +217,11 @@
(* len (/ (+ n (1- len)) len))) ;Isn't there a simpler way?
(defun bindat--unpack-group (spec)
+ ;; FIXME: Introduce a new primitive so we can mark `bindat-unpack'
+ ;; as obsolete (maybe that primitive should be a macro which takes
+ ;; a bindat type *expression* as argument).
+ (if (cl-typep spec 'bindat--type)
+ (funcall (bindat--type-ue spec))
(with-suppressed-warnings ((lexical struct last))
(defvar struct) (defvar last))
(let (struct last)
@@ -350,7 +282,7 @@
(setq struct (if field
(cons (cons field data) struct)
(append data struct))))))
- struct))
+ struct)))
(defun bindat-unpack (spec raw &optional idx)
"Return structured data according to SPEC for binary data in RAW.
@@ -383,10 +315,11 @@ e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
(u16 . 2) (u16r . 2) (word . 2) (short . 2)
(u24 . 3) (u24r . 3)
(u32 . 4) (u32r . 4) (dword . 4) (long . 4)
- (u64 . 8) (u64r . 8)
(ip . 4)))
(defun bindat--length-group (struct spec)
+ (if (cl-typep spec 'bindat--type)
+ (funcall (bindat--type-le spec) struct)
(with-suppressed-warnings ((lexical struct last))
(defvar struct) (defvar last))
(let ((struct struct) last)
@@ -452,7 +385,7 @@ e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
(setq len (* len (cdr type))))
(if field
(setq last (bindat-get-field struct field)))
- (setq bindat-idx (+ bindat-idx len))))))))
+ (setq bindat-idx (+ bindat-idx len)))))))))
(defun bindat-length (spec struct)
"Calculate `bindat-raw' length for STRUCT according to bindat SPEC."
@@ -529,11 +462,9 @@ e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
((or 'u16 'word 'short) (bindat--pack-u16 v))
('u24 (bindat--pack-u24 v))
((or 'u32 'dword 'long) (bindat--pack-u32 v))
- ('u64 (bindat--pack-u64 v))
('u16r (bindat--pack-u16r v))
('u24r (bindat--pack-u24r v))
('u32r (bindat--pack-u32r v))
- ('u64r (bindat--pack-u64r v))
('bits (bindat--pack-bits len v))
((or 'str 'strz) (bindat--pack-str len v))
('vec
@@ -550,6 +481,8 @@ e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
(setq bindat-idx (+ bindat-idx len)))))
(defun bindat--pack-group (struct spec)
+ (if (cl-typep spec 'bindat--type)
+ (funcall (bindat--type-pe spec) struct)
(with-suppressed-warnings ((lexical struct last))
(defvar struct) (defvar last))
(let ((struct struct) last)
@@ -607,7 +540,7 @@ e.g. corresponding to STRUCT.FIELD1[INDEX2].FIELD3..."
(_
(setq last (bindat-get-field struct field))
(bindat--pack-item last type len vectype)
- ))))))
+ )))))))
(defun bindat-pack (spec struct &optional raw idx)
"Return binary data packed according to SPEC for structured data STRUCT.
@@ -623,52 +556,6 @@ Optional fourth arg IDX is the starting offset into RAW."
(bindat--pack-group struct spec)
(if raw nil bindat-raw)))
-;;;; Debugging support
-
-(def-edebug-elem-spec 'bindat-spec '(&rest bindat-item))
-
-
-(def-edebug-elem-spec 'bindat--item-aux
- ;; Field types which can come without a field label.
- '(&or ["eval" form]
- ["fill" bindat-len]
- ["align" bindat-len]
- ["struct" form] ;A reference to another bindat-spec.
- ["union" bindat-tag-val &rest (bindat-tag bindat-spec)]))
-
-(def-edebug-elem-spec 'bindat-item
- '((&or bindat--item-aux ;Without label..
- [bindat-field ;..or with label
- &or bindat--item-aux
- ["repeat" bindat-arg bindat-spec]
- bindat-type])))
-
-(def-edebug-elem-spec 'bindat-type
- '(&or ("eval" form)
- ["str" bindat-len]
- ["strz" bindat-len]
- ["vec" bindat-len &optional bindat-type]
- ["bits" bindat-len]
- symbolp))
-
-(def-edebug-elem-spec 'bindat-field
- '(&or ("eval" form) symbolp))
-
-(def-edebug-elem-spec 'bindat-len '(&or [] "nil" bindat-arg))
-
-(def-edebug-elem-spec 'bindat-tag-val '(bindat-arg))
-
-(def-edebug-elem-spec 'bindat-tag '(&or ("eval" form) atom))
-
-(def-edebug-elem-spec 'bindat-arg
- '(&or ("eval" form) integerp (&rest symbolp integerp)))
-
-(defmacro bindat-spec (&rest fields)
- "Build the bindat spec described by FIELDS."
- (declare (indent 0) (debug (bindat-spec)))
- ;; FIXME: We should really "compile" this to a triplet of functions!
- `',fields)
-
;;;; Misc. format conversions
(defun bindat-format-vector (vect fmt sep &optional len)
@@ -697,6 +584,384 @@ The port (if any) is omitted. IP can be a string, as well."
(format "%d.%d.%d.%d"
(aref ip 0) (aref ip 1) (aref ip 2) (aref ip 3))))
+;;;; New approach based on macro-expansion
+
+;; Further improvements suggested by reading websocket.el:
+;; - Support for bit-sized fields?
+;;
+;; - Add some way to verify redundant/checksum fields's contents without
+;; having to provide a complete `:unpack-val' expression.
+;; The `:pack-val' thingy can work nicely to compute checksum fields
+;; based on previous fields's contents (without impacting or being impacted
+;; by the unpacked representation), but if we want to verify
+;; those checksums when unpacking, we have to use the :unpack-val
+;; and build the whole object by hand instead of being able to focus
+;; just on the checksum field.
+;; Maybe this could be related to `unit' type fields where we might like
+;; to make sure that the "value" we write into it is the same as the
+;; value it holds (tho those checks don't happen at the same time (pack
+;; vs unpack).
+;;
+;; - Support for packing/unpacking to/from something else than
+;; a unibyte string, e.g. from a buffer. Problems to do that are:
+;; - the `str' and `strz' types which use `substring' rather than reading
+;; one byte at a time.
+;; - the `align' and `fill' which just want to skip without reading/writing
+;; - the `pack-uint' case, which would prefer writing the LSB first.
+;; - the `align' case needs to now the current position in order to know
+;; how far to advance
+;;
+;; - Don't write triple code when the type is only ever used at a single place
+;; (e.g. to unpack).
+
+(defun bindat--unpack-uint (bitlen)
+ (let ((v 0) (bitsdone 0))
+ (while (< bitsdone bitlen)
+ (setq v (logior (ash v 8) (bindat--unpack-u8)))
+ (setq bitsdone (+ bitsdone 8)))
+ v))
+
+(defun bindat--unpack-uintr (bitlen)
+ (let ((v 0) (bitsdone 0))
+ (while (< bitsdone bitlen)
+ (setq v (logior v (ash (bindat--unpack-u8) bitsdone)))
+ (setq bitsdone (+ bitsdone 8)))
+ v))
+
+(defun bindat--pack-uint (bitlen v)
+ (let* ((len (/ bitlen 8))
+ (shift (- (* 8 (1- len)))))
+ (dotimes (_ len)
+ (bindat--pack-u8 (logand 255 (ash v shift)))
+ (setq shift (+ 8 shift)))))
+
+(defun bindat--pack-uintr (bitlen v)
+ (let* ((len (/ bitlen 8)))
+ (dotimes (_ len)
+ (bindat--pack-u8 (logand v 255))
+ (setq v (ash v -8)))))
+
+(defmacro bindat--pcase (&rest args)
+ "Like `pcase' but optimize the code under the assumption that it's exhaustive."
+ (declare (indent 1) (debug pcase))
+ `(pcase ,@args (pcase--dontcare nil)))
+
+(cl-defgeneric bindat--type (op head &rest args)
+ "Return the code for the operation OP of the Bindat type (HEAD . ARGS).
+OP can be one of: unpack', (pack VAL), or (length VAL) where VAL
+is the name of a variable that will hold the value we need to pack.")
+
+(cl-defmethod bindat--type (op (_ (eql byte)))
+ (bindat--pcase op
+ ('unpack `(bindat--unpack-u8))
+ (`(length . ,_) `(cl-incf bindat-idx 1))
+ (`(pack . ,args) `(bindat--pack-u8 . ,args))))
+
+(cl-defmethod bindat--type (op (_ (eql uint)) n)
+ (if (eq n 8) (bindat--type op 'byte)
+ (bindat--pcase op
+ ('unpack `(bindat--unpack-uint ,n))
+ (`(length . ,_) `(cl-incf bindat-idx (/ ,n 8)))
+ (`(pack . ,args) `(bindat--pack-uint ,n . ,args)))))
+
+(cl-defmethod bindat--type (op (_ (eql uintr)) n)
+ (if (eq n 8) (bindat--type op 'byte)
+ (bindat--pcase op
+ ('unpack `(bindat--unpack-uintr ,n))
+ (`(length . ,_) `(cl-incf bindat-idx (/ ,n 8)))
+ (`(pack . ,args) `(bindat--pack-uintr ,n . ,args)))))
+
+(cl-defmethod bindat--type (op (_ (eql str)) len)
+ (bindat--pcase op
+ ('unpack `(bindat--unpack-str ,len))
+ (`(length . ,_) `(cl-incf bindat-idx ,len))
+ (`(pack . ,args) `(bindat--pack-str ,len . ,args))))
+
+(cl-defmethod bindat--type (op (_ (eql strz)) len)
+ (bindat--pcase op
+ ('unpack `(bindat--unpack-strz ,len))
+ (`(length . ,_) `(cl-incf bindat-idx ,len))
+ ;; Here we don't add the terminating zero because we rely
+ ;; on the fact that `bindat-raw' was presumably initialized with
+ ;; all-zeroes before we started.
+ (`(pack . ,args) `(bindat--pack-str ,len . ,args))))
+
+(cl-defmethod bindat--type (op (_ (eql bits)) len)
+ (bindat--pcase op
+ ('unpack `(bindat--unpack-bits ,len))
+ (`(length . ,_) `(cl-incf bindat-idx ,len))
+ (`(pack . ,args) `(bindat--pack-bits ,len . ,args))))
+
+(cl-defmethod bindat--type (_op (_ (eql fill)) len)
+ `(progn (cl-incf bindat-idx ,len) nil))
+
+(cl-defmethod bindat--type (_op (_ (eql align)) len)
+ `(progn (cl-callf bindat--align bindat-idx ,len) nil))
+
+(cl-defmethod bindat--type (op (_ (eql type)) exp)
+ (bindat--pcase op
+ ('unpack `(funcall (bindat--type-ue ,exp)))
+ (`(length . ,args) `(funcall (bindat--type-le ,exp) . ,args))
+ (`(pack . ,args) `(funcall (bindat--type-pe ,exp) . ,args))))
+
+(cl-defmethod bindat--type (op (_ (eql vec)) count &rest type)
+ (unless type (setq type '(byte)))
+ (let ((fun (macroexpand-all (bindat--fun type) macroexpand-all-environment)))
+ (bindat--pcase op
+ ('unpack
+ `(let* ((bindat--len ,count)
+ (bindat--v (make-vector bindat--len 0)))
+ (dotimes (bindat--i bindat--len)
+ (aset bindat--v bindat--i (funcall ,fun)))
+ bindat--v))
+ ((and `(length . ,_)
+ ;; FIXME: Improve the pattern match to recognize more complex
+ ;; "constant" functions?
+ (let `#'(lambda (,val) (setq bindat-idx (+ bindat-idx ,len))) fun)
+ (guard (not (macroexp--fgrep `((,val)) len))))
+ ;; Optimize the case where the size of each element is constant.
+ `(cl-incf bindat-idx (* ,count ,len)))
+ ;; FIXME: It's tempting to use `(mapc (lambda (,val) ,exp) ,val)'
+ ;; which would be more efficient when `val' is a list,
+ ;; but that's only right if length of `val' is indeed `count'.
+ (`(,_ ,val)
+ `(dotimes (bindat--i ,count)
+ (funcall ,fun (elt ,val bindat--i)))))))
+
+(cl-defmethod bindat--type (op (_ (eql unit)) val)
+ (pcase op ('unpack val) (_ nil)))
+
+(cl-defmethod bindat--type (op (_ (eql struct)) &rest args)
+ (apply #'bindat--type op args))
+
+(cl-defmethod bindat--type (op (_ (eql :pack-var)) var &rest fields)
+ (unless (consp (cdr fields))
+ (error "`:pack-var VAR' needs to be followed by fields"))
+ (bindat--pcase op
+ ((or 'unpack (guard (null var)))
+ (apply #'bindat--type op fields))
+ (`(,_ ,val)
+ `(let ((,var ,val)) ,(apply #'bindat--type op fields)))))
+
+(cl-defmethod bindat--type (op (field cons) &rest fields)
+ (named-let loop
+ ((fields (cons field fields))
+ (labels ()))
+ (bindat--pcase fields
+ ('nil
+ (bindat--pcase op
+ ('unpack
+ (let ((exp ()))
+ (pcase-dolist (`(,label . ,labelvar) labels)
+ (setq exp
+ (if (eq label '_)
+ (if exp `(nconc ,labelvar ,exp) labelvar)
+ `(cons (cons ',label ,labelvar) ,exp))))
+ exp))
+ (_ nil)))
+ (`(:unpack-val ,exp)
+ ;; Make it so `:kwd nil' is the same as the absence of the keyword arg.
+ (if exp (pcase op ('unpack exp)) (loop nil labels)))
+
+ (`((,label . ,type) . ,fields)
+ (let* ((get-field-val
+ (let ((tail (memq :pack-val type)))
+ ;; FIXME: This `TYPE.. :pack EXP' syntax doesn't work well
+ ;; when TYPE is a struct (a list of fields) or with extensions
+ ;; such as allowing TYPE to be `if ...'.
+ (if tail
+ (prog1 (cadr tail)
+ (setq type (butlast type (length tail)))))))
+ (fieldvar (make-symbol (format "field%d" (length fields))))
+ (labelvar
+ (cond
+ ((eq label '_) fieldvar)
+ ((keywordp label)
+ (intern (substring (symbol-name label) 1)))
+ (t label)))
+ (field-fun (bindat--fun type))
+ (rest-exp (loop fields `((,label . ,labelvar) . ,labels))))
+ (bindat--pcase op
+ ('unpack
+ (let ((code
+ `(let ((,labelvar (funcall ,field-fun)))
+ ,rest-exp)))
+ (if (or (eq label '_) (not (assq label labels)))
+ code
+ (macroexp-warn-and-return
+ (format "Duplicate label: %S" label)
+ code))))
+ (`(,_ ,val)
+ ;; `cdr-safe' is easier to optimize (can't signal an error).
+ `(let ((,fieldvar ,(or get-field-val
+ (if (eq label '_) val
+ `(cdr-safe (assq ',label ,val))))))
+ (funcall ,field-fun ,fieldvar)
+ ,@(when rest-exp
+ `((let ,(unless (eq labelvar fieldvar)
+ `((,labelvar ,fieldvar)))
+ (ignore ,labelvar)
+ ,rest-exp))))))))
+ (_ (error "Unrecognized format in bindat fields: %S" fields)))))
+
+(def-edebug-elem-spec 'bindat-struct
+ [[&rest (symbolp bindat-type &optional ":pack-val" def-form)]
+ &optional ":unpack-val" def-form])
+
+(def-edebug-elem-spec 'bindat-type
+ '(&or ["uint" def-form]
+ ["uintr" def-form]
+ ["str" def-form]
+ ["strz" def-form]
+ ["bits" def-form]
+ ["fill" def-form]
+ ["align" def-form]
+ ["vec" def-form bindat-type]
+ ["repeat" def-form bindat-type]
+ ["type" def-form]
+ ["struct" bindat-struct]
+ ["unit" def-form]
+ [":pack-var" symbolp bindat-type]
+ symbolp ;; u8, u16, etc...
+ bindat-struct))
+
+(defmacro bindat-type (&rest type)
+ "Return the Bindat type value to pack&unpack TYPE.
+TYPE is a Bindat type expression. It can take the following forms:
+
+ uint BITLEN - Big-endian unsigned integer
+ uintr BITLEN - Little-endian unsigned integer
+ str LEN - Byte string
+ strz LEN - Zero-terminated byte-string
+ bits LEN - Bit vector (LEN is counted in bytes)
+ fill LEN - Just a filler
+ align LEN - Fill up to the next multiple of LEN bytes
+ vec COUNT TYPE - COUNT repetitions of TYPE
+ type EXP - Indirection; EXP should return a Bindat type value
+ unit EXP - 0-width type holding the value returned by EXP
+ struct FIELDS... - A composite type
+
+When the context makes it clear, the symbol `struct' can be omitted.
+A composite type is a list of FIELDS where each FIELD is of the form
+
+ (LABEL TYPE)
+
+where LABEL can be `_' if the field should not deserve a name.
+
+Composite types get normally packed/unpacked to/from alists, but this can be
+controlled in the following way:
+- If the list of fields ends with `:unpack-val EXP', then unpacking will
+ return the value of EXP (which has the previous fields in its scope).
+- If a field's TYPE is followed by `:pack-val EXP', then the value placed
+ into this field will be that returned by EXP instead of looking up the alist.
+- If the list of fields is preceded with `:pack-var VAR' then the object to
+ be packed is bound to VAR when evaluating the EXPs of `:pack-val'.
+
+All the above BITLEN, LEN, COUNT, and EXP are ELisp expressions evaluated
+in the current lexical context extended with the previous fields.
+
+TYPE can additionally be one of the Bindat type macros defined with
+`bindat-defmacro' (and listed below) or an ELisp expression which returns
+a bindat type expression."
+ (declare (indent 0) (debug (bindat-type)))
+ `(progn
+ (defvar bindat-idx)
+ (bindat--make :ue ,(bindat--toplevel 'unpack type)
+ :le ,(bindat--toplevel 'length type)
+ :pe ,(bindat--toplevel 'pack type))))
+
+(eval-and-compile
+ (defconst bindat--primitives '(byte uint uintr str strz bits fill align
+ struct type vec unit)))
+
+(eval-and-compile
+ (defvar bindat--macroenv
+ (mapcar (lambda (s) (cons s (lambda (&rest args)
+ (bindat--makefun (cons s args)))))
+ bindat--primitives)))
+
+(defmacro bindat-defmacro (name args &rest body)
+ "Define a new Bindat type as a macro."
+ (declare (indent 2) (doc-string 3) (debug (&define name sexp def-body)))
+ (let ((leaders ()))
+ (while (and (cdr body)
+ (or (stringp (car body))
+ (memq (car-safe (car body)) '(:documentation declare))))
+ (push (pop body) leaders))
+ ;; FIXME: Add support for Edebug decls to those macros.
+ `(eval-and-compile ;; Yuck! But needed to define types where you use them!
+ (setf (alist-get ',name bindat--macroenv)
+ (lambda ,args ,@(nreverse leaders)
+ (bindat--fun ,(macroexp-progn body)))))))
+
+(put 'bindat-type 'function-documentation '(bindat--make-docstring))
+(defun bindat--make-docstring ()
+ ;; Largely inspired from `pcase--make-docstring'.
+ (let* ((main (documentation (symbol-function 'bindat-type) 'raw))
+ (ud (help-split-fundoc main 'bindat-type)))
+ (require 'help-fns)
+ (declare-function help-fns--signature "help-fns")
+ (with-temp-buffer
+ (insert (or (cdr ud) main))
+ (pcase-dolist (`(,name . ,me) (reverse bindat--macroenv))
+ (unless (memq name bindat--primitives)
+ (let ((doc (documentation me 'raw)))
+ (insert "\n\n-- ")
+ (setq doc (help-fns--signature name doc me
+ (indirect-function me)
+ nil))
+ (insert "\n" (or doc "Not documented.")))))
+ (let ((combined-doc (buffer-string)))
+ (if ud (help-add-fundoc-usage combined-doc (car ud)) combined-doc)))))
+
+(bindat-defmacro u8 () "Unsigned 8bit integer." '(byte))
+(bindat-defmacro sint (bitlen r)
+ "Signed integer of size BITLEN.
+Bigendian if R is nil and little endian if not."
+ (let ((bl (make-symbol "bitlen"))
+ (max (make-symbol "max"))
+ (wrap (make-symbol "wrap")))
+ `(let* ((,bl ,bitlen)
+ (,max (ash 1 (1- ,bl)))
+ (,wrap (+ ,max ,max)))
+ (struct :pack-var v
+ (n if ,r (uintr ,bl) (uint ,bl)
+ :pack-val (if (< v 0) (+ v ,wrap) v))
+ :unpack-val (if (>= n ,max) (- n ,wrap) n)))))
+
+(bindat-defmacro repeat (count &rest type)
+ "Like `vec', but unpacks to a list rather than a vector."
+ `(:pack-var v
+ (v vec ,count ,@type :pack-val v)
+ :unpack-val (append v nil)))
+
+(defvar bindat--op nil
+ "The operation we're currently building.
+This is a simple symbol and can be one of: `unpack', `pack', or `length'.
+This is used during macroexpansion of `bindat-type' so that the
+macros know which code to generate.
+FIXME: this is closely related and very similar to the `op' argument passed
+to `bindat--type', yet it's annoyingly different.")
+
+(defun bindat--fun (type)
+ (if (or (keywordp (car type)) (consp (car type))) (cons 'struct type)
+ type))
+
+(defun bindat--makefun (type)
+ (let* ((v (make-symbol "v"))
+ (args (pcase bindat--op ('unpack ()) (_ (list v)))))
+ (pcase (apply #'bindat--type
+ (pcase bindat--op ('unpack 'unpack) (op `(,op . ,args)))
+ type)
+ (`(funcall ,f . ,(pred (equal args))) f) ;η-reduce.
+ (exp `(lambda ,args ,exp)))))
+
+(defun bindat--toplevel (op type)
+ (let* ((bindat--op op)
+ (env `(,@bindat--macroenv
+ ,@macroexpand-all-environment)))
+ (macroexpand-all (bindat--fun type) env)))
+
(provide 'bindat)
;;; bindat.el ends here