summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKuang-che Wu <kcwu@google.com>2020-06-06 12:43:46 +0800
committerMartin Michlmayr <tbm@cyrius.com>2021-01-17 10:31:00 +0800
commit2dae3bbedcdf55983d23fc90bb36111c7eb68fc7 (patch)
tree446f93d9c625f40828188f37b970e5f01fc2d368
parent0e19e3fe51c2c056a91af48e4733aa13a840fdaa (diff)
downloadfork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.tar.gz
fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.tar.bz2
fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.zip
format_t::truncate support wide characters
also add unit tests
-rw-r--r--src/format.cc20
-rw-r--r--src/unistring.h48
-rw-r--r--test/unit/CMakeLists.txt2
-rw-r--r--test/unit/t_format.cc122
4 files changed, 181 insertions, 11 deletions
diff --git a/src/format.cc b/src/format.cc
index 5b9baa21..87168ea2 100644
--- a/src/format.cc
+++ b/src/format.cc
@@ -478,7 +478,7 @@ string format_t::truncate(const unistring& ustr,
{
assert(width < 4095);
- const std::size_t len = ustr.length();
+ const std::size_t len = ustr.width();
if (width == 0 || len <= width)
return ustr.extract();
@@ -491,14 +491,14 @@ string format_t::truncate(const unistring& ustr,
switch (style) {
case TRUNCATE_LEADING:
// This method truncates at the beginning.
- buf << ".." << ustr.extract(len - (width - 2), width - 2);
+ buf << ".." << ustr.extract_by_width(len - (width - 2), width - 2);
break;
case TRUNCATE_MIDDLE:
// This method truncates in the middle.
- buf << ustr.extract(0, (width - 2) / 2)
+ buf << ustr.extract_by_width(0, (width - 2) / 2)
<< ".."
- << ustr.extract(len - ((width - 2) / 2 + (width - 2) % 2),
+ << ustr.extract_by_width(len - ((width - 2) / 2 + (width - 2) % 2),
(width - 2) / 2 + (width - 2) % 2);
break;
@@ -545,7 +545,7 @@ string format_t::truncate(const unistring& ustr,
for (std::list<string>::iterator i = parts.begin();
i != parts.end();
i++) {
- std::size_t l = unistring(*i).length();
+ std::size_t l = unistring(*i).width();
DEBUG("format.abbrev",
"Segment " << ++index << " is " << l << " chars wide");
lens.push_back(l);
@@ -667,8 +667,8 @@ string format_t::truncate(const unistring& ustr,
}
unistring temp(*i);
- if (temp.length() > *l)
- result << temp.extract(0, *l) << ":";
+ if (temp.width() > *l)
+ result << temp.extract_by_width(0, *l) << ":";
else
result << *i << ":";
}
@@ -677,8 +677,8 @@ string format_t::truncate(const unistring& ustr,
// Even abbreviated its too big to show the last account, so
// abbreviate all but the last and truncate at the beginning.
unistring temp(result.str());
- assert(temp.length() > width - 2);
- buf << ".." << temp.extract(temp.length() - (width - 2), width - 2);
+ assert(temp.width() > width - 2);
+ buf << ".." << temp.extract_by_width(temp.width() - (width - 2), width - 2);
} else {
buf << result.str();
}
@@ -688,7 +688,7 @@ string format_t::truncate(const unistring& ustr,
case TRUNCATE_TRAILING:
// This method truncates at the end (the default).
- buf << ustr.extract(0, width - 2) << "..";
+ buf << ustr.extract_by_width(0, width - 2) << "..";
break;
}
diff --git a/src/unistring.h b/src/unistring.h
index 8cc4a9cd..87b2f904 100644
--- a/src/unistring.h
+++ b/src/unistring.h
@@ -111,6 +111,54 @@ public:
return utf8result;
}
+ std::string extract_by_width(std::string::size_type begin,
+ std::size_t len) const
+ {
+ std::string utf8result;
+ std::size_t this_width = width();
+ std::string::size_type this_len = length();
+
+ assert(begin <= this_width);
+ if (begin + len > this_width)
+ len = this_width - begin;
+
+ std::size_t pos = 0;
+ std::size_t begin_idx = 0, end_idx = 0;
+ std::size_t head = 0, tail = 0;
+ for (std::size_t idx = 0; idx < this_len; ++idx) {
+ std::size_t w = mk_wcwidth(utf32chars[idx]);
+
+ if (pos < begin) {
+ if (pos + w >= begin) {
+ head = std::min(pos + w, begin + len) - begin;
+ begin_idx = idx + 1;
+ }
+ } else if (pos < begin + len) {
+ if (pos + w > begin + len) {
+ tail = begin + len - pos;
+ end_idx = idx;
+ }
+ if (pos + w == begin + len) {
+ tail = 0;
+ end_idx = idx + 1;
+ }
+ }
+ pos += w;
+ }
+
+ utf8result += std::string(head, '.');
+
+ if (begin_idx < end_idx)
+ utf8::unchecked::utf32to8
+ (utf32chars.begin() + static_cast<std::string::difference_type>(begin_idx),
+ utf32chars.begin() + static_cast<std::string::difference_type>(end_idx),
+ std::back_inserter(utf8result));
+
+ utf8result += std::string(tail, '.');
+
+ return utf8result;
+ }
+
std::size_t find(const boost::uint32_t __s, std::size_t __pos = 0) const {
std::size_t idx = 0;
foreach (const boost::uint32_t& ch, utf32chars) {
diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt
index 1bd5e4b3..5570d5e1 100644
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@@ -8,7 +8,7 @@ endmacro(add_ledger_test _name)
include_directories(${PROJECT_SOURCE_DIR}/src)
if (BUILD_LIBRARY)
- add_executable(UtilTests t_times.cc)
+ add_executable(UtilTests t_times.cc t_format.cc)
if (HAVE_BOOST_PYTHON)
target_link_libraries(UtilTests ${Python_LIBRARIES})
endif()
diff --git a/test/unit/t_format.cc b/test/unit/t_format.cc
new file mode 100644
index 00000000..142db92c
--- /dev/null
+++ b/test/unit/t_format.cc
@@ -0,0 +1,122 @@
+#define BOOST_TEST_DYN_LINK
+
+#include <boost/test/unit_test.hpp>
+
+#include <system.hh>
+
+#include "format.h"
+
+using namespace ledger;
+
+struct format_fixture {
+ format_fixture() {
+ format_t::default_style = format_t::TRUNCATE_TRAILING;
+ format_t::default_style_changed = false;
+ }
+ ~format_fixture() {
+ format_t::default_style = format_t::TRUNCATE_TRAILING;
+ format_t::default_style_changed = false;
+ }
+};
+
+BOOST_FIXTURE_TEST_SUITE(format, format_fixture)
+
+BOOST_AUTO_TEST_CASE(testTruncateTrailing)
+{
+ format_t::default_style = format_t::TRUNCATE_TRAILING;
+ unistring str("abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 0, 0), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 99, 0), "abcd:1234:ABCD");
+
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 14, 0), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 13, 0), "abcd:1234:A..");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 12, 0), "abcd:1234:..");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 11, 0), "abcd:1234..");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 10, 0), "abcd:123..");
+
+ unistring ustr("中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 14, 0), "中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 13, 0), "中文:中文:...");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 12, 0), "中文:中文:..");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 11, 0), "中文:中文..");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 10, 0), "中文:中...");
+}
+
+BOOST_AUTO_TEST_CASE(testTruncateMiddle)
+{
+ format_t::default_style = format_t::TRUNCATE_MIDDLE;
+ unistring str("abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 0, 0), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 99, 0), "abcd:1234:ABCD");
+
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 14, 0), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 13, 0), "abcd:..4:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 12, 0), "abcd:..:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 11, 0), "abcd..:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 10, 0), "abcd..ABCD");
+
+ unistring ustr("中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 14, 0), "中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 13, 0), "中文:...:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 12, 0), "中文:..:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 11, 0), "中文..:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 10, 0), "中文..中文");
+}
+
+BOOST_AUTO_TEST_CASE(testTruncateLeading)
+{
+ format_t::default_style = format_t::TRUNCATE_LEADING;
+ unistring str("abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 0, 0), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 99, 0), "abcd:1234:ABCD");
+
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 14, 0), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 13, 0), "..d:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 12, 0), "..:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 11, 0), "..1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 10, 0), "..234:ABCD");
+
+ unistring ustr("中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 14, 0), "中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 13, 0), "...:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 12, 0), "..:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 11, 0), "..中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 10, 0), "...文:中文");
+}
+
+BOOST_AUTO_TEST_CASE(testTruncateAbbreviate)
+{
+ format_t::default_style = format_t::ABBREVIATE;
+ unistring str("abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 0, 2), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 99, 2), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 14, 2), "abcd:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 13, 2), "abc:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 12, 2), "ab:1234:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 11, 2), "ab:123:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 10, 2), "ab:12:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 9, 2), "..12:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 8, 2), "..2:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 7, 2), "..:ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 6, 2), "..ABCD");
+ BOOST_CHECK_EQUAL(format_t::truncate(str, 5, 2), "..BCD");
+
+ unistring ustr("中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 14, 2), "中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 13, 2), "中.:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 12, 2), "中:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 11, 2), "中:中.:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 10, 2), "中:中:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 9, 2), "..中:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 8, 2), "...:中文");
+
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 14, 1), "中文:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 13, 1), "中.:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 12, 1), "中:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 11, 1), ".:中文:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 10, 1), ".:中.:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 9, 1), ".:中:中文");
+ BOOST_CHECK_EQUAL(format_t::truncate(ustr, 8, 1), ".:.:中文");
+}
+
+BOOST_AUTO_TEST_SUITE_END()