summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexis Hildebrandt <afh@surryhill.net>2022-10-01 20:25:36 +0200
committerMartin Michlmayr <tbm@cyrius.com>2022-10-02 10:19:52 +0800
commit4be0fc082c7d3a03f2b66fa4ffb9dbfb3d606478 (patch)
tree1aafabcf6096a92701f22b1150859869632e66fa
parentcccb827886773066665e99df4a4ecec4ab5bda14 (diff)
downloadfork-ledger-4be0fc082c7d3a03f2b66fa4ffb9dbfb3d606478.tar.gz
fork-ledger-4be0fc082c7d3a03f2b66fa4ffb9dbfb3d606478.tar.bz2
fork-ledger-4be0fc082c7d3a03f2b66fa4ffb9dbfb3d606478.zip
Fix handling of UCS-1 encoded strings
Add test to python/demo.py
-rwxr-xr-xpython/demo.py5
-rw-r--r--src/py_utils.cc23
2 files changed, 16 insertions, 12 deletions
diff --git a/python/demo.py b/python/demo.py
index e57af05a..1230e005 100755
--- a/python/demo.py
+++ b/python/demo.py
@@ -58,6 +58,9 @@ usd = comms.find_or_create('$')
eur = comms.find_or_create('EUR')
xcd = comms.find_or_create('XCD')
+# Tests currency symbols encoded with UCS-1. For details see #2132.
+xxx = comms.find_or_create('¤')
+
assert not comms.find('CAD')
assert not comms.has_key('CAD')
assert not 'CAD' in comms
@@ -79,7 +82,7 @@ comms.european_by_default = True
# don't need to worry about them, but they'll show up if you examine all the
# keys in the commodities dict.
-assertEqual([u'', u'$', u'%', u'EUR', u'XCD', u'h', u'm', u's'],
+assertEqual([u'', u'$', u'%', u'EUR', u'XCD', u'h', u'm', u's', u'¤'],
sorted(comms.keys()))
# All the styles of dict iteration are supported:
diff --git a/src/py_utils.cc b/src/py_utils.cc
index cf416e08..d1413fec 100644
--- a/src/py_utils.cc
+++ b/src/py_utils.cc
@@ -136,32 +136,33 @@ struct string_from_python
#if PY_MINOR_VERSION < 12
PyUnicode_READY(obj_ptr);
#endif
- const char* value;
switch (PyUnicode_KIND(obj_ptr)) {
- case PyUnicode_1BYTE_KIND:
- value = (const char*)PyUnicode_1BYTE_DATA(obj_ptr);
+ case PyUnicode_1BYTE_KIND: {
+ Py_UCS1* value = PyUnicode_1BYTE_DATA(obj_ptr);
+ if (value == 0) throw_error_already_set();
str = std::string(value);
- break;
+ } break;
#if PY_MINOR_VERSION < 12 && Py_UNICODE_SIZE == 2
case PyUnicode_WCHAR_KIND:
#endif
- case PyUnicode_2BYTE_KIND:
- value = (const char*)PyUnicode_2BYTE_DATA(obj_ptr);
+ case PyUnicode_2BYTE_KIND: {
+ Py_UCS2* value = PyUnicode_2BYTE_DATA(obj_ptr);
+ if (value == 0) throw_error_already_set();
utf8::unchecked::utf16to8(value, value + size, std::back_inserter(str));
- break;
+ } break;
#if PY_MINOR_VERSION < 12 && Py_UNICODE_SIZE == 4
case PyUnicode_WCHAR_KIND:
#endif
- case PyUnicode_4BYTE_KIND:
- value = (const char*)PyUnicode_4BYTE_DATA(obj_ptr);
+ case PyUnicode_4BYTE_KIND: {
+ Py_UCS4* value = PyUnicode_4BYTE_DATA(obj_ptr);
+ if (value == 0) throw_error_already_set();
utf8::unchecked::utf32to8(value, value + size, std::back_inserter(str));
- break;
+ } break;
default:
assert("PyUnicode_KIND returned an unexpected kind" == NULL);
}
#endif // PY_MAJOR_VERSION
- if (value == 0) throw_error_already_set();
void* storage =
reinterpret_cast<converter::rvalue_from_python_storage<string> *>
(data)->storage.bytes;