summaryrefslogtreecommitdiff
path: root/third_party/llvm-project/include/llvm/Support/ConvertUTF.h
diff options
context:
space:
mode:
authorAlon Zakai <azakai@google.com>2023-09-18 14:20:15 -0700
committerGitHub <noreply@github.com>2023-09-18 14:20:15 -0700
commit08528e7947c141d09a69c30cb558c9ec3698e771 (patch)
tree5e7a1a6e92f714341900cf807d7a997f75aca7bc /third_party/llvm-project/include/llvm/Support/ConvertUTF.h
parent939a45a17b0d0e940bfe981036f8e358376a7315 (diff)
downloadbinaryen-08528e7947c141d09a69c30cb558c9ec3698e771.tar.gz
binaryen-08528e7947c141d09a69c30cb558c9ec3698e771.tar.bz2
binaryen-08528e7947c141d09a69c30cb558c9ec3698e771.zip
Update two files from upstream LLVM, ConvertUTF.h,cpp (#5954)
Almost no actual change in the files except for a license update. The new license is a proper FOSS one, it turns out, see #5947 Fixes #5947
Diffstat (limited to 'third_party/llvm-project/include/llvm/Support/ConvertUTF.h')
-rw-r--r--third_party/llvm-project/include/llvm/Support/ConvertUTF.h74
1 files changed, 58 insertions, 16 deletions
diff --git a/third_party/llvm-project/include/llvm/Support/ConvertUTF.h b/third_party/llvm-project/include/llvm/Support/ConvertUTF.h
index 1add18533..c892bb3c0 100644
--- a/third_party/llvm-project/include/llvm/Support/ConvertUTF.h
+++ b/third_party/llvm-project/include/llvm/Support/ConvertUTF.h
@@ -6,25 +6,41 @@
*
*==------------------------------------------------------------------------==*/
/*
- * Copyright 2001-2004 Unicode, Inc.
+ * Copyright © 1991-2015 Unicode, Inc. All rights reserved.
+ * Distributed under the Terms of Use in
+ * http://www.unicode.org/copyright.html.
*
- * Disclaimer
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of the Unicode data files and any associated documentation
+ * (the "Data Files") or Unicode software and any associated documentation
+ * (the "Software") to deal in the Data Files or Software
+ * without restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, and/or sell copies of
+ * the Data Files or Software, and to permit persons to whom the Data Files
+ * or Software are furnished to do so, provided that
+ * (a) this copyright and permission notice appear with all copies
+ * of the Data Files or Software,
+ * (b) this copyright and permission notice appear in associated
+ * documentation, and
+ * (c) there is clear notice in each modified Data File or in the Software
+ * as well as in the documentation associated with the Data File(s) or
+ * Software that the data or software has been modified.
*
- * This source code is provided as is by Unicode, Inc. No claims are
- * made as to fitness for any particular purpose. No warranties of any
- * kind are expressed or implied. The recipient agrees to determine
- * applicability of information provided. If this file has been
- * purchased on magnetic or optical media from Unicode, Inc., the
- * sole remedy for any claim will be exchange of defective media
- * within 90 days of receipt.
+ * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
+ * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
+ * NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
+ * DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+ * PERFORMANCE OF THE DATA FILES OR SOFTWARE.
*
- * Limitations on Rights to Redistribute This Code
- *
- * Unicode, Inc. hereby grants the right to freely use the information
- * supplied in this file in the creation of products supporting the
- * Unicode Standard, and to make copies of this file in any form
- * for internal or external distribution as long as this notice
- * remains attached.
+ * Except as contained in this notice, the name of a copyright holder
+ * shall not be used in advertising or otherwise to promote the sale,
+ * use or other dealings in these Data Files or Software without prior
+ * written authorization of the copyright holder.
*/
/* ---------------------------------------------------------------------
@@ -91,7 +107,10 @@
#include <cstddef>
#include <string>
+
+#if defined(_WIN32)
#include <system_error>
+#endif
// Wrap everything in namespace llvm so that programs can link with llvm and
// their own version of the unicode libraries.
@@ -123,6 +142,9 @@ typedef unsigned char Boolean; /* 0 or 1 */
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
+#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF
+#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000
+
typedef enum {
conversionOK, /* conversion successful */
sourceExhausted, /* partial character in source, but hit end */
@@ -175,6 +197,8 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
+unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd);
+
unsigned getNumBytesForUTF8(UTF8 firstByte);
/*************************************************************************/
@@ -279,6 +303,24 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out);
/**
+ * Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string.
+ *
+ * \param [in] SrcBytes A buffer of what is assumed to be UTF-32 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out);
+
+/**
+ * Converts a UTF32 string into a UTF8 std::string.
+ *
+ * \param [in] Src A buffer of UTF-32 encoded text.
+ * \param [out] Out Converted UTF-8 is stored here on success.
+ * \returns true on success
+ */
+bool convertUTF32ToUTF8String(ArrayRef<UTF32> Src, std::string &Out);
+
+/**
* Converts a UTF-8 string into a UTF-16 string with native endianness.
*
* \returns true on success