From 4be0fc082c7d3a03f2b66fa4ffb9dbfb3d606478 Mon Sep 17 00:00:00 2001 From: Alexis Hildebrandt Date: Sat, 1 Oct 2022 20:25:36 +0200 Subject: Fix handling of UCS-1 encoded strings Add test to python/demo.py --- src/py_utils.cc | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'src/py_utils.cc') diff --git a/src/py_utils.cc b/src/py_utils.cc index cf416e08..d1413fec 100644 --- a/src/py_utils.cc +++ b/src/py_utils.cc @@ -136,32 +136,33 @@ struct string_from_python #if PY_MINOR_VERSION < 12 PyUnicode_READY(obj_ptr); #endif - const char* value; switch (PyUnicode_KIND(obj_ptr)) { - case PyUnicode_1BYTE_KIND: - value = (const char*)PyUnicode_1BYTE_DATA(obj_ptr); + case PyUnicode_1BYTE_KIND: { + Py_UCS1* value = PyUnicode_1BYTE_DATA(obj_ptr); + if (value == 0) throw_error_already_set(); str = std::string(value); - break; + } break; #if PY_MINOR_VERSION < 12 && Py_UNICODE_SIZE == 2 case PyUnicode_WCHAR_KIND: #endif - case PyUnicode_2BYTE_KIND: - value = (const char*)PyUnicode_2BYTE_DATA(obj_ptr); + case PyUnicode_2BYTE_KIND: { + Py_UCS2* value = PyUnicode_2BYTE_DATA(obj_ptr); + if (value == 0) throw_error_already_set(); utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str)); - break; + } break; #if PY_MINOR_VERSION < 12 && Py_UNICODE_SIZE == 4 case PyUnicode_WCHAR_KIND: #endif - case PyUnicode_4BYTE_KIND: - value = (const char*)PyUnicode_4BYTE_DATA(obj_ptr); + case PyUnicode_4BYTE_KIND: { + Py_UCS4* value = PyUnicode_4BYTE_DATA(obj_ptr); + if (value == 0) throw_error_already_set(); utf8::unchecked::utf32to8(value, value + size, std::back_inserter(str)); - break; + } break; default: assert("PyUnicode_KIND returned an unexpected kind" == NULL); } #endif // PY_MAJOR_VERSION - if (value == 0) throw_error_already_set(); void* storage = reinterpret_cast *> (data)->storage.bytes; -- cgit v1.2.3