17 #ifndef COM_BORA_SOFTWARE__BALAU_TYPE__CHARACTER 18 #define COM_BORA_SOFTWARE__BALAU_TYPE__CHARACTER 22 #include <unicode/uchar.h> 23 #include <unicode/utf8.h> 37 return u_islower((UChar32) c);
44 return u_isupper((UChar32) c);
51 return u_isdigit((UChar32) c);
58 return c <= 0x7f && u_isxdigit((UChar32) c);
65 return c >= U
'0' && c <= U
'9';
72 return c == U
'0' || c == U
'1';
79 return u_isalpha((UChar32) c);
86 return u_isalnum((UChar32) c);
93 return u_iscntrl((UChar32) c);
100 return u_isJavaSpaceChar((UChar32) c);
107 return u_isWhitespace((UChar32) c);
114 return u_isblank((UChar32) c);
121 return u_isprint((UChar32) c);
128 return u_ispunct((UChar32) c);
135 return u_isIDStart((UChar32) c);
142 return u_isIDPart((UChar32) c);
149 return (u_isWhitespace((UChar32) c) || c == U
'-');
163 return U8_LENGTH((UChar32) c);
179 static char32_t
getNextUtf8(
const std::string_view & text,
int & offset) {
181 U8_NEXT_UNSAFE(text.data(), offset, newUChar);
182 return (char32_t) newUChar;
200 U8_NEXT(text.data(), offset, (int32_t) text.length(), newUChar);
201 return (char32_t) newUChar;
217 U8_PREV_UNSAFE(text.data(), offset, newUChar);
218 return (char32_t) newUChar;
236 U8_PREV(text.data(), 0, offset, newUChar);
237 return (char32_t) newUChar;
248 static void advanceUtf8(
const std::string_view & text,
int & offset) {
249 U8_FWD_1_UNSAFE(text.data(), offset);
263 U8_FWD_1(text.data(), offset, ((int32_t) text.length()));
274 static void retreatUtf8(
const std::string_view & text,
int & offset) {
275 U8_BACK_1_UNSAFE(text.data(), offset);
289 U8_BACK_1(((uint8_t *) text.data()), ((int32_t) 0), offset);
294 static bool isValid(char32_t c) {
304 return (char32_t) u_toupper((UChar32) c);
311 return (char32_t) u_tolower((UChar32) c);
330 auto newUChar = (UChar32) c;
331 U8_APPEND_UNSAFE(&destination[0], offset, newUChar);
337 #endif // COM_BORA_SOFTWARE__BALAU_TYPE__CHARACTER static char32_t toUpper(char32_t c)
Convert the supplied code point to uppercase.
Definition: Character.hpp:303
static bool isUpper(char32_t c)
Does the specified code point have the general category "Lu" (uppercase letter).
Definition: Character.hpp:43
static bool isPunctuation(char32_t c)
Does the specified code point have the general category "P" (punctuation).
Definition: Character.hpp:127
static void advanceUtf8Safe(const std::string_view &text, int &offset)
Advance the supplied offset from one code point boundary to the next one (validating version)...
Definition: Character.hpp:262
static char32_t toLower(char32_t c)
Convert the supplied code point to lowercase.
Definition: Character.hpp:310
static bool isOctalDigit(char32_t c)
Is the specified code point one of the ASCII characters 0-7.
Definition: Character.hpp:64
static bool isLower(char32_t c)
Does the specified code point have the general category "Ll" (lowercase letter).
Definition: Character.hpp:36
static bool isInclusiveBreakableCharacter(char32_t c)
Is the specified code point a breakable character for line endings that should be printed...
Definition: Character.hpp:155
static char32_t getNextUtf8(const std::string_view &text, int &offset)
Get the next code point from the UTF-8 string view.
Definition: Character.hpp:179
The root Balau namespace.
Definition: ApplicationConfiguration.hpp:23
static void retreatUtf8Safe(const std::string_view &text, int &offset)
Retreat the supplied offset from one code point boundary to the previous one (validating version)...
Definition: Character.hpp:288
static bool isControlCharacter(char32_t c)
Is the specified code point a control character.
Definition: Character.hpp:92
static bool isBlank(char32_t c)
Is the specified code point a character that visibly separates words on a line.
Definition: Character.hpp:113
static void advanceUtf8(const std::string_view &text, int &offset)
Advance the supplied offset from one code point boundary to the next one.
Definition: Character.hpp:248
static bool isHexDigit(char32_t c)
Does the specified code point have the general category "Nd" (decimal digit numbers) or is one of the...
Definition: Character.hpp:57
static bool isBinaryDigit(char32_t c)
Is the specified code point one of the ASCII characters 0-1.
Definition: Character.hpp:71
static char32_t getNextUtf8Safe(const std::string_view &text, int &offset)
Get the next code point from the UTF-8 string view (validating version).
Definition: Character.hpp:198
static bool isWhitespace(char32_t c)
Is the specified code point a whitespace character.
Definition: Character.hpp:106
static char32_t getPreviousUtf8Safe(const std::string_view &text, int &offset)
Get the previous code point from the UTF-8 string view (validating version).
Definition: Character.hpp:234
Core includes, typedefs and functions.
static void retreatUtf8(const std::string_view &text, int &offset)
Retreat the supplied offset from one code point boundary to the previous one.
Definition: Character.hpp:274
static bool isAlpha(char32_t c)
Does the specified code point have the general category "L" (letters).
Definition: Character.hpp:78
static bool isDigit(char32_t c)
Does the specified code point have the general category "Nd" (decimal digit numbers).
Definition: Character.hpp:50
static char32_t getPreviousUtf8(const std::string_view &text, int &offset)
Get the previous code point from the UTF-8 string view.
Definition: Character.hpp:215
static size_t utf8ByteCount(char32_t c)
Returns the number of bytes that the character occupies when UTF-8 encoded.
Definition: Character.hpp:162
Utilities for unicode characters and code points.
Definition: Character.hpp:30
static bool isSpace(char32_t c)
Is the specified code point a space character (excluding CR / LF).
Definition: Character.hpp:99
static bool isIdStart(char32_t c)
Does the specified code point have the general category "L" (letters) or "Nl" (letter numbers)...
Definition: Character.hpp:134
static bool isPrintable(char32_t c)
Is the specified code point a printable character.
Definition: Character.hpp:120
static void setUtf8AndAdvanceOffset(std::string &destination, int &offset, char32_t c)
Write a code point into the supplied UTF-8 string.
Definition: Character.hpp:329
static bool isBreakableCharacter(char32_t c)
Is the specified code point a breakable character for line endings.
Definition: Character.hpp:148
static bool isIdPart(char32_t c)
Is the specified code point valid as part of an Id.
Definition: Character.hpp:141
static bool isAlphaOrDecimal(char32_t c)
Does the specified code point have the general category "L" (letters) or "Nd" (decimal digit numbers)...
Definition: Character.hpp:85