summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/unistring.h95
1 files changed, 95 insertions, 0 deletions
diff --git a/src/unistring.h b/src/unistring.h
new file mode 100644
index 00000000..b23be7c2
--- /dev/null
+++ b/src/unistring.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2003-2009, John Wiegley. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of New Artisans LLC nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * @addtogroup utils
+ */
+
+/**
+ * @file unistring.h
+ * @author John Wiegley
+ *
+ * @ingroup utils
+ *
+ * @brief Brief
+ *
+ * Long.
+ */
+#ifndef _UNISTRING_H
+#define _UNISTRING_H
+
+#include "utils.h"
+
+/**
+ * @class unistring
+ *
+ * @brief Abstract working with UTF-32 encoded Unicode strings
+ *
+ * The input to the string is a UTF8 encoded ledger::string, which can
+ * then have its true length be taken, or characters extracted.
+ */
+class unistring
+{
+ std::vector<boost::uint32_t> utf32chars;
+
+public:
+ unistring(const std::string& input)
+ {
+ TRACE_CTOR(unistring, "");
+
+ const char * p = input.c_str();
+ std::size_t len = input.length();
+
+#if 0
+ assert(utf8::is_valid(p, p + len));
+#endif
+ utf8::utf8to32(p, p + len, std::back_inserter(utf32chars));
+ }
+ ~unistring() {
+ TRACE_DTOR(unistring);
+ }
+
+ std::size_t length() const {
+ return utf32chars.size();
+ }
+
+ std::string extract(const std::size_t begin = 0,
+ const std::size_t len = 0) const
+ {
+ std::string utf8result;
+ utf8::utf32to8(utf32chars.begin() + begin,
+ utf32chars.begin() + begin + (len ? len : length()),
+ std::back_inserter(utf8result));
+ return utf8result;
+ }
+};
+
+#endif // _UNISTRING_H