diff options
author | Alon Zakai <azakai@google.com> | 2023-09-18 14:20:15 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-09-18 14:20:15 -0700 |
commit | 08528e7947c141d09a69c30cb558c9ec3698e771 (patch) | |
tree | 5e7a1a6e92f714341900cf807d7a997f75aca7bc /third_party/llvm-project/include/llvm/Support/ConvertUTF.h | |
parent | 939a45a17b0d0e940bfe981036f8e358376a7315 (diff) | |
download | binaryen-08528e7947c141d09a69c30cb558c9ec3698e771.tar.gz binaryen-08528e7947c141d09a69c30cb558c9ec3698e771.tar.bz2 binaryen-08528e7947c141d09a69c30cb558c9ec3698e771.zip |
Update two files from upstream LLVM, ConvertUTF.h,cpp (#5954)
Almost no actual change in the files except for a license update. The new license
is a proper FOSS one, it turns out, see #5947
Fixes #5947
Diffstat (limited to 'third_party/llvm-project/include/llvm/Support/ConvertUTF.h')
-rw-r--r-- | third_party/llvm-project/include/llvm/Support/ConvertUTF.h | 74 |
1 files changed, 58 insertions, 16 deletions
diff --git a/third_party/llvm-project/include/llvm/Support/ConvertUTF.h b/third_party/llvm-project/include/llvm/Support/ConvertUTF.h index 1add18533..c892bb3c0 100644 --- a/third_party/llvm-project/include/llvm/Support/ConvertUTF.h +++ b/third_party/llvm-project/include/llvm/Support/ConvertUTF.h @@ -6,25 +6,41 @@ * *==------------------------------------------------------------------------==*/ /* - * Copyright 2001-2004 Unicode, Inc. + * Copyright © 1991-2015 Unicode, Inc. All rights reserved. + * Distributed under the Terms of Use in + * http://www.unicode.org/copyright.html. * - * Disclaimer + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of the Unicode data files and any associated documentation + * (the "Data Files") or Unicode software and any associated documentation + * (the "Software") to deal in the Data Files or Software + * without restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, and/or sell copies of + * the Data Files or Software, and to permit persons to whom the Data Files + * or Software are furnished to do so, provided that + * (a) this copyright and permission notice appear with all copies + * of the Data Files or Software, + * (b) this copyright and permission notice appear in associated + * documentation, and + * (c) there is clear notice in each modified Data File or in the Software + * as well as in the documentation associated with the Data File(s) or + * Software that the data or software has been modified. * - * This source code is provided as is by Unicode, Inc. No claims are - * made as to fitness for any particular purpose. No warranties of any - * kind are expressed or implied. The recipient agrees to determine - * applicability of information provided. If this file has been - * purchased on magnetic or optical media from Unicode, Inc., the - * sole remedy for any claim will be exchange of defective media - * within 90 days of receipt. + * THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF + * ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE + * WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT OF THIRD PARTY RIGHTS. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS + * NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL + * DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR + * PERFORMANCE OF THE DATA FILES OR SOFTWARE. * - * Limitations on Rights to Redistribute This Code - * - * Unicode, Inc. hereby grants the right to freely use the information - * supplied in this file in the creation of products supporting the - * Unicode Standard, and to make copies of this file in any form - * for internal or external distribution as long as this notice - * remains attached. + * Except as contained in this notice, the name of a copyright holder + * shall not be used in advertising or otherwise to promote the sale, + * use or other dealings in these Data Files or Software without prior + * written authorization of the copyright holder. */ /* --------------------------------------------------------------------- @@ -91,7 +107,10 @@ #include <cstddef> #include <string> + +#if defined(_WIN32) #include <system_error> +#endif // Wrap everything in namespace llvm so that programs can link with llvm and // their own version of the unicode libraries. @@ -123,6 +142,9 @@ typedef unsigned char Boolean; /* 0 or 1 */ #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE +#define UNI_UTF32_BYTE_ORDER_MARK_NATIVE 0x0000FEFF +#define UNI_UTF32_BYTE_ORDER_MARK_SWAPPED 0xFFFE0000 + typedef enum { conversionOK, /* conversion successful */ sourceExhausted, /* partial character in source, but hit end */ @@ -175,6 +197,8 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd); Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd); +unsigned getUTF8SequenceSize(const UTF8 *source, const UTF8 *sourceEnd); + unsigned getNumBytesForUTF8(UTF8 firstByte); /*************************************************************************/ @@ -279,6 +303,24 @@ bool convertUTF16ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out); bool convertUTF16ToUTF8String(ArrayRef<UTF16> Src, std::string &Out); /** + * Converts a stream of raw bytes assumed to be UTF32 into a UTF8 std::string. + * + * \param [in] SrcBytes A buffer of what is assumed to be UTF-32 encoded text. + * \param [out] Out Converted UTF-8 is stored here on success. + * \returns true on success + */ +bool convertUTF32ToUTF8String(ArrayRef<char> SrcBytes, std::string &Out); + +/** + * Converts a UTF32 string into a UTF8 std::string. + * + * \param [in] Src A buffer of UTF-32 encoded text. + * \param [out] Out Converted UTF-8 is stored here on success. + * \returns true on success + */ +bool convertUTF32ToUTF8String(ArrayRef<UTF32> Src, std::string &Out); + +/** * Converts a UTF-8 string into a UTF-16 string with native endianness. * * \returns true on success |