From 62a544551af64be153824a718d873dd1f34b4cb0 Mon Sep 17 00:00:00 2001
From: Alexis Hildebrandt <afh@surryhill.net>
Date: Tue, 5 Jul 2022 15:01:51 +0200
Subject: Fix use of deprecated Python legacy Unicode API

replacing it with APIs introduced in Python 3.3 to ensure ledger's
Python bindings continue to function when the legacy Unicode API is
removed in Python 3.12.

For details see https://docs.python.org/3.10/c-api/unicode.html
---
 src/py_utils.cc | 70 ++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 52 insertions(+), 18 deletions(-)

(limited to 'src/py_utils.cc')
diff --git a/src/py_utils.cc b/src/py_utils.cc
index fee1d659..704237d6 100644
--- a/src/py_utils.cc
+++ b/src/py_utils.cc
@@ -110,31 +110,65 @@ struct string_from_python
                         (data)->storage.bytes;
       new (storage) string(value);
       data->convertible = storage;
-    } else {
+      return;
+    }
 #endif
-      VERIFY(PyUnicode_Check(obj_ptr));
-
-      Py_ssize_t size = PyUnicode_GET_SIZE(obj_ptr);
-      const Py_UNICODE* value = PyUnicode_AS_UNICODE(obj_ptr);
+    VERIFY(PyUnicode_Check(obj_ptr));
 
-      string str;
+    string str;
+#if PY_MAJOR_VERSION < 3
+    Py_ssize_t size = PyUnicode_GET_SIZE(obj_ptr);
+    const Py_UNICODE* value = PyUnicode_AS_UNICODE(obj_ptr);
 #if Py_UNICODE_SIZE == 2 // UTF-16
-        utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str));
+    utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str));
 #elif Py_UNICODE_SIZE == 4 // UTF-32
-        utf8::unchecked::utf32to8(value, value + size, std::back_inserter(str));
+    utf8::unchecked::utf32to8(value, value + size, std::back_inserter(str));
+#else
+    assert("Py_UNICODE has an unexpected size" == NULL);
+#endif // Py_UNICODE_SIZE
+#else // PY_MAJOR_VERSION >= 3
+    Py_ssize_t size =
+#if PY_MINOR_VERSION >= 3
+      PyUnicode_GET_LENGTH(obj_ptr);
 #else
-        assert("Py_UNICODE has an unexpected size" == NULL);
+      PyUnicode_GET_SIZE(obj_ptr);
 #endif
-
-      if (value == 0) throw_error_already_set();
-      void* storage =
-        reinterpret_cast<converter::rvalue_from_python_storage<string> *>
-                        (data)->storage.bytes;
-      new (storage) string(str);
-      data->convertible = storage;
-#if PY_MAJOR_VERSION < 3
-    }
+#if PY_MINOR_VERSION < 12
+    PyUnicode_READY(obj_ptr);
+#endif
+    const char* value;
+    switch (PyUnicode_KIND(obj_ptr)) {
+      case PyUnicode_1BYTE_KIND:
+        value = (const char*)PyUnicode_1BYTE_DATA(obj_ptr);
+        // FIXME: It seems wrong to use `utf16to8` on 1BYTE_DATA, yet without this call
+        // the tests fail with: libc++abi: terminating with uncaught exception of type int
+        utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str));
+        break;
+#if PY_MINOR_VERSION < 12 && Py_UNICODE_SIZE == 2
+      case PyUnicode_WCHAR_KIND:
+#endif
+      case PyUnicode_2BYTE_KIND:
+        value = (const char*)PyUnicode_2BYTE_DATA(obj_ptr);
+        utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str));
+        break;
+#if PY_MINOR_VERSION < 12 && Py_UNICODE_SIZE == 4
+      case PyUnicode_WCHAR_KIND:
 #endif
+      case PyUnicode_4BYTE_KIND:
+        value = (const char*)PyUnicode_4BYTE_DATA(obj_ptr);
+        utf8::unchecked::utf32to8(value, value + size, std::back_inserter(str));
+        break;
+      default:
+        assert("PyUnicode_KIND returned an unexpected kind" == NULL);
+    }
+#endif // PY_MAJOR_VERSION
+
+    if (value == 0) throw_error_already_set();
+    void* storage =
+      reinterpret_cast<converter::rvalue_from_python_storage<string> *>
+                      (data)->storage.bytes;
+    new (storage) string(str);
+    data->convertible = storage;
   }
 };
 
-- 
cgit v1.2.3