#pragma once
#include <cstdint>
#include <cstring>
#include <string>
#include <string_view>
uint32_t u8_nextchar(const char *s, int *i, size_t size);
uint32_t u8_nextchar_unsafe(const char *s, int *i);
int u8_wc_toutf8(char *dest, uint32_t ch);
void u8_inc(const char *s, int *i);
void u8_dec(const char *s, int *i);
inline bool CodepointIsProbablyEmoji(uint32_t c) {
return c > 0xFFFF;
}
bool AnyEmojiInString(std::string_view str, size_t byteCount);
class UTF8 {
public:
static const uint32_t INVALID = (uint32_t)-1;
explicit UTF8(const char *c) : c_(c), size_((int)strlen(c)), index_(0) {}
explicit UTF8(std::string_view view) : c_(view.data()), size_((int)view.size()), index_(0) {}
explicit UTF8(std::string_view view, int index) : c_(view.data()), size_((int)view.size()), index_(index) {}
bool end() const { return index_ == size_; }
bool invalid() const {
unsigned char c = (unsigned char)c_[index_];
return (c >= 0x80 && c <= 0xC1) || c >= 0xF5;
}
uint32_t next() {
return u8_nextchar(c_, &index_, size_);
}
uint32_t next_unsafe() {
return u8_nextchar_unsafe(c_, &index_);
}
uint32_t peek() const {
int tempIndex = index_;
return u8_nextchar(c_, &tempIndex, size_);
}
void fwd() {
u8_inc(c_, &index_);
}
void bwd() {
u8_dec(c_, &index_);
}
int length() const {
return size_;
}
int byteIndex() const {
return index_;
}
static int encode(char *dest, uint32_t ch) {
return u8_wc_toutf8(dest, ch);
}
static int encodeUnits(uint32_t ch) {
if (ch < 0x80) {
return 1;
} else if (ch < 0x800) {
return 2;
} else if (ch < 0x10000) {
return 3;
} else if (ch < 0x110000) {
return 4;
}
return 0;
}
private:
const char *c_;
int index_;
int size_;
};
int UTF8StringNonASCIICount(std::string_view utf8string);
bool UTF8StringHasNonASCII(std::string_view utf8string);
std::string SanitizeUTF8(std::string_view utf8string);
std::string CodepointToUTF8(uint32_t codePoint);
#ifdef _WIN32
std::string ConvertWStringToUTF8(const std::wstring &wstr);
std::string ConvertWStringToUTF8(const wchar_t *wstr);
void ConvertUTF8ToWString(wchar_t *dest, size_t destSize, std::string_view source);
std::wstring ConvertUTF8ToWString(std::string_view source);
#else
std::wstring ConvertUTF8ToWString(std::string_view source);
std::string ConvertWStringToUTF8(const std::wstring &wstr);
#endif
std::string ConvertUCS2ToUTF8(const std::u16string &wstr);
std::u16string ConvertUTF8ToUCS2(std::string_view source);
void ConvertUTF8ToJavaModifiedUTF8(std::string *output, std::string_view input);