format_t::truncate support wide characters

also add unit tests
author: Kuang-che Wu <kcwu@google.com> 2020-06-06 12:43:46 +0800
committer: Martin Michlmayr <tbm@cyrius.com> 2021-01-17 10:31:00 +0800
commit: 2dae3bbedcdf55983d23fc90bb36111c7eb68fc7 (patch)
tree: 446f93d9c625f40828188f37b970e5f01fc2d368 /src
parent: 0e19e3fe51c2c056a91af48e4733aa13a840fdaa (diff)
download: fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.tar.gz
fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.tar.bz2
fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.zip
2 files changed, 58 insertions, 10 deletions
diff --git a/src/format.cc b/src/format.cc
index 5b9baa21..87168ea2 100644
--- a/src/format.cc
+++ b/src/format.cc
@@ -478,7 +478,7 @@ string format_t::truncate(const unistring&  ustr,
 {
   assert(width < 4095);
 
-  const std::size_t len = ustr.length();
+  const std::size_t len = ustr.width();
   if (width == 0 || len <= width)
     return ustr.extract();
 
@@ -491,14 +491,14 @@ string format_t::truncate(const unistring&  ustr,
   switch (style) {
   case TRUNCATE_LEADING:
     // This method truncates at the beginning.
-    buf << ".." << ustr.extract(len - (width - 2), width - 2);
+    buf << ".." << ustr.extract_by_width(len - (width - 2), width - 2);
     break;
 
   case TRUNCATE_MIDDLE:
     // This method truncates in the middle.
-    buf << ustr.extract(0, (width - 2) / 2)
+    buf << ustr.extract_by_width(0, (width - 2) / 2)
         << ".."
-        << ustr.extract(len - ((width - 2) / 2 + (width - 2) % 2),
+        << ustr.extract_by_width(len - ((width - 2) / 2 + (width - 2) % 2),
                         (width - 2) / 2 + (width - 2) % 2);
     break;
 
@@ -545,7 +545,7 @@ string format_t::truncate(const unistring&  ustr,
       for (std::list<string>::iterator i = parts.begin();
            i != parts.end();
            i++) {
-        std::size_t l = unistring(*i).length();
+        std::size_t l = unistring(*i).width();
         DEBUG("format.abbrev",
               "Segment " << ++index << " is " << l << " chars wide");
         lens.push_back(l);
@@ -667,8 +667,8 @@ string format_t::truncate(const unistring&  ustr,
         }
 
         unistring temp(*i);
-        if (temp.length() > *l)
-          result << temp.extract(0, *l) << ":";
+        if (temp.width() > *l)
+          result << temp.extract_by_width(0, *l) << ":";
         else
           result << *i << ":";
       }
@@ -677,8 +677,8 @@ string format_t::truncate(const unistring&  ustr,
         // Even abbreviated its too big to show the last account, so
         // abbreviate all but the last and truncate at the beginning.
         unistring temp(result.str());
-        assert(temp.length() > width - 2);
-        buf << ".." << temp.extract(temp.length() - (width - 2), width - 2);
+        assert(temp.width() > width - 2);
+        buf << ".." << temp.extract_by_width(temp.width() - (width - 2), width - 2);
       } else {
         buf << result.str();
       }
@@ -688,7 +688,7 @@ string format_t::truncate(const unistring&  ustr,
 
   case TRUNCATE_TRAILING:
     // This method truncates at the end (the default).
-    buf << ustr.extract(0, width - 2) << "..";
+    buf << ustr.extract_by_width(0, width - 2) << "..";
     break;
   }
 
diff --git a/src/unistring.h b/src/unistring.h
index 8cc4a9cd..87b2f904 100644
--- a/src/unistring.h
+++ b/src/unistring.h
@@ -111,6 +111,54 @@ public:
     return utf8result;
   }
 
+  std::string extract_by_width(std::string::size_type begin,
+                               std::size_t            len) const
+  {
+    std::string utf8result;
+    std::size_t this_width = width();
+    std::string::size_type this_len = length();
+
+    assert(begin <= this_width);
+    if (begin + len > this_width)
+	len = this_width - begin;
+
+    std::size_t pos = 0;
+    std::size_t begin_idx = 0, end_idx = 0;
+    std::size_t head = 0, tail = 0;
+    for (std::size_t idx = 0; idx < this_len; ++idx) {
+      std::size_t w = mk_wcwidth(utf32chars[idx]);
+
+      if (pos < begin) {
+        if (pos + w >= begin) {
+          head = std::min(pos + w, begin + len) - begin;
+          begin_idx = idx + 1;
+        }
+      } else if (pos < begin + len) {
+        if (pos + w > begin + len) {
+          tail = begin + len - pos;
+          end_idx = idx;
+        }
+        if (pos + w == begin + len) {
+          tail = 0;
+          end_idx = idx + 1;
+        }
+      }
+      pos += w;
+    }
+
+    utf8result += std::string(head, '.');
+
+    if (begin_idx < end_idx)
+      utf8::unchecked::utf32to8
+        (utf32chars.begin() + static_cast<std::string::difference_type>(begin_idx),
+         utf32chars.begin() + static_cast<std::string::difference_type>(end_idx),
+         std::back_inserter(utf8result));
+
+    utf8result += std::string(tail, '.');
+
+    return utf8result;
+  }
+
   std::size_t find(const boost::uint32_t __s, std::size_t __pos = 0) const {
     std::size_t idx = 0;
     foreach (const boost::uint32_t& ch, utf32chars) {
author	Kuang-che Wu <kcwu@google.com>	2020-06-06 12:43:46 +0800
committer	Martin Michlmayr <tbm@cyrius.com>	2021-01-17 10:31:00 +0800
commit	2dae3bbedcdf55983d23fc90bb36111c7eb68fc7 (patch)
tree	446f93d9c625f40828188f37b970e5f01fc2d368 /src
parent	0e19e3fe51c2c056a91af48e4733aa13a840fdaa (diff)
download	fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.tar.gz fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.tar.bz2 fork-ledger-2dae3bbedcdf55983d23fc90bb36111c7eb68fc7.zip