diff options
author | Mattias Engdegård <mattiase@acm.org> | 2024-03-31 15:00:00 +0200 |
---|---|---|
committer | Mattias Engdegård <mattiase@acm.org> | 2024-04-01 10:41:46 +0200 |
commit | 734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1 (patch) | |
tree | 28e48c30e5916247065e8e66d9a80d9991ec1c2b /test | |
parent | f178a6d8006f1e8afe06bb71d0a413622d73f131 (diff) | |
download | emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.tar.gz emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.tar.bz2 emacs-734bd005aa0fa955cf1a46d3a60a4d6ef5e7e3d1.zip |
Faster JSON parsing
Speed up JSON parsing substantially by only UTF-8-parsing string
literals and only exactly once. Previously, json-parse-string always
first parsed the entire input and copied it to a new string, and then
validated each string literal twice.
We no longer create an extra new string when interning an alist key,
nor do we garble plist keys with Unicode characters.
* src/lread.c (intern_c_multibyte): New.
* src/json.c (json_encode): Remove.
(utf8_error): New.
(json_parse_string): Faster and more careful UTF-8 decoding.
Create and return a new multibyte string or symbol without extra
decoding. All callers adapted.
(Fjson_parse_string): Skip expensive input pre-decoding.
* test/src/json-tests.el (json-parse-string/object-unicode-keys)
(json-parse-string/short): New.
(json-parse-string/string, json-parse-string/invalid-unicode):
Adapt tests.
* etc/NEWS: Mentioned change in errors.
Diffstat (limited to 'test')
-rw-r--r-- | test/src/json-tests.el | 69 |
1 files changed, 50 insertions, 19 deletions
diff --git a/test/src/json-tests.el b/test/src/json-tests.el index fb2384d4a8d..a1bafadaa87 100644 --- a/test/src/json-tests.el +++ b/test/src/json-tests.el @@ -25,6 +25,7 @@ (require 'cl-lib) (require 'map) +(require 'subr-x) (declare-function json-serialize "json.c" (object &rest args)) (declare-function json-insert "json.c" (object &rest args)) @@ -155,6 +156,9 @@ ) (ert-deftest json-parse-string/object () + :expected-result :failed + ;; FIXME: This currently fails. Should the parser deduplicate keys? + ;; Never, always, or for alist and plist only? (let ((input "{ \"abc\" : [1, 2, true], \"def\" : null, \"abc\" : [9, false] }\n")) (let ((actual (json-parse-string input))) @@ -167,6 +171,15 @@ (should (equal (json-parse-string input :object-type 'plist) '(:abc [9 :false] :def :null))))) +(ert-deftest json-parse-string/object-unicode-keys () + (let ((input "{\"é\":1,\"☃\":2,\"𐌐\":3}")) + (let ((actual (json-parse-string input))) + (should (equal (sort (hash-table-keys actual)) '("é" "☃" "𐌐")))) + (should (equal (json-parse-string input :object-type 'alist) + '((é . 1) (☃ . 2) (𐌐 . 3)))) + (should (equal (json-parse-string input :object-type 'plist) + '(:é 1 :☃ 2 :𐌐 3))))) + (ert-deftest json-parse-string/array () (let ((input "[\"a\", 1, [\"b\", 2]]")) (should (equal (json-parse-string input) @@ -182,8 +195,8 @@ ["\nasdфывfgh\t"])) (should (equal (json-parse-string "[\"\\uD834\\uDD1E\"]") ["\U0001D11E"])) (should-error (json-parse-string "foo") :type 'json-parse-error) - ;; FIXME: Is this the right behavior? - (should (equal (json-parse-string "[\"\u00C4\xC3\x84\"]") ["\u00C4\u00C4"]))) + (should-error (json-parse-string "[\"\u00C4\xC3\x84\"]") + :type 'json-utf8-decode-error)) (ert-deftest json-serialize/string () (should (equal (json-serialize ["foo"]) "[\"foo\"]")) @@ -201,9 +214,23 @@ (should-error (json-serialize ["u\xCCv"]) :type 'wrong-type-argument) (should-error (json-serialize ["u\u00C4\xCCv"]) :type 'wrong-type-argument)) +(ert-deftest json-parse-string/short () + :expected-result :failed + (should-error (json-parse-string "") :type 'json-end-of-file) + (should-error (json-parse-string " ") :type 'json-end-of-file) + ;; BUG: currently results in `json-end-of-file' for short non-empty inputs. + (dolist (s '("a" "ab" "abc" "abcd" + "t" "tr" "tru" "truE" "truee" + "n" "nu" "nul" "nulL" "nulll" + "f" "fa" "fal" "fals" "falsE" "falsee")) + (condition-case err + (json-parse-string s) + (error + (should (eq (car err) 'json-parse-error))) + (:success (error "parsing %S should fail" s))))) + (ert-deftest json-parse-string/null () - (should-error (json-parse-string "\x00") :type 'wrong-type-argument) - (should (json-parse-string "[\"a\\u0000b\"]")) + (should (equal (json-parse-string "[\"a\\u0000b\"]") ["a\0b"])) (let* ((string "{\"foo\":\"this is a string including a literal \\u0000\"}") (data (json-parse-string string))) (should (hash-table-p data)) @@ -214,30 +241,34 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt. Test with both unibyte and multibyte strings." ;; Invalid UTF-8 code unit sequences. - (should-error (json-parse-string "[\"\x80\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\u00C4\x80\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\xBF\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\u00C4\xBF\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\xFE\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\u00C4\xFE\"]") :type 'json-parse-error) - (should-error (json-parse-string "[\"\xC0\xAF\"]") :type 'json-parse-error) + (should-error (json-parse-string "[\"\x80\"]") :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\u00C4\x80\"]") + :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\xBF\"]") :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\u00C4\xBF\"]") + :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\xFE\"]") :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\u00C4\xFE\"]") + :type 'json-utf8-decode-error) + (should-error (json-parse-string "[\"\xC0\xAF\"]") + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xC0\xAF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xC0\x80\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) ;; Surrogates. (should-error (json-parse-string "[\"\uDB7F\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\xED\xAD\xBF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\uDB7F\uDFFF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\xED\xAD\xBF\xED\xBF\xBF\"]") - :type 'json-parse-error) + :type 'json-utf8-decode-error) (should-error (json-parse-string "[\"\u00C4\xED\xAD\xBF\xED\xBF\xBF\"]") - :type 'json-parse-error)) + :type 'json-utf8-decode-error)) (ert-deftest json-parse-string/incomplete () (should-error (json-parse-string "[123") :type 'json-end-of-file)) |