Character.hpp
Go to the documentation of this file.
1 // @formatter:off
2 //
3 // Balau core C++ library
4 //
5 // Copyright (C) 2008 Bora Software (contact@borasoftware.com)
6 //
7 // Licensed under the Boost Software License - Version 1.0 - August 17th, 2003.
8 // See the LICENSE file for the full license text.
9 //
10 
16 
17 #ifndef COM_BORA_SOFTWARE__BALAU_TYPE__CHARACTER
18 #define COM_BORA_SOFTWARE__BALAU_TYPE__CHARACTER
19 
20 #include <Balau/Type/StdTypes.hpp>
21 
22 #include <unicode/uchar.h>
23 #include <unicode/utf8.h>
24 
25 namespace Balau {
26 
30 struct Character {
32 
36  static bool isLower(char32_t c) {
37  return u_islower((UChar32) c);
38  }
39 
43  static bool isUpper(char32_t c) {
44  return u_isupper((UChar32) c);
45  }
46 
50  static bool isDigit(char32_t c) {
51  return u_isdigit((UChar32) c);
52  }
53 
57  static bool isHexDigit(char32_t c) {
58  return c <= 0x7f && u_isxdigit((UChar32) c);
59  }
60 
64  static bool isOctalDigit(char32_t c) {
65  return c >= U'0' && c <= U'9';
66  }
67 
71  static bool isBinaryDigit(char32_t c) {
72  return c == U'0' || c == U'1';
73  }
74 
78  static bool isAlpha(char32_t c) {
79  return u_isalpha((UChar32) c);
80  }
81 
85  static bool isAlphaOrDecimal(char32_t c) {
86  return u_isalnum((UChar32) c);
87  }
88 
92  static bool isControlCharacter(char32_t c) {
93  return u_iscntrl((UChar32) c);
94  }
95 
99  static bool isSpace(char32_t c) {
100  return u_isJavaSpaceChar((UChar32) c);
101  }
102 
106  static bool isWhitespace(char32_t c) {
107  return u_isWhitespace((UChar32) c);
108  }
109 
113  static bool isBlank(char32_t c) {
114  return u_isblank((UChar32) c);
115  }
116 
120  static bool isPrintable(char32_t c) {
121  return u_isprint((UChar32) c);
122  }
123 
127  static bool isPunctuation(char32_t c) {
128  return u_ispunct((UChar32) c);
129  }
130 
134  static bool isIdStart(char32_t c) {
135  return u_isIDStart((UChar32) c);
136  }
137 
141  static bool isIdPart(char32_t c) {
142  return u_isIDPart((UChar32) c);
143  }
144 
148  static bool isBreakableCharacter(char32_t c) {
149  return (u_isWhitespace((UChar32) c) || c == U'-');
150  }
151 
155  static bool isInclusiveBreakableCharacter(char32_t c) {
156  return c == U'-';
157  }
158 
162  static size_t utf8ByteCount(char32_t c) {
163  return U8_LENGTH((UChar32) c);
164  }
165 
167 
179  static char32_t getNextUtf8(const std::string_view & text, int & offset) {
180  UChar32 newUChar;
181  U8_NEXT_UNSAFE(text.data(), offset, newUChar); // NOLINT
182  return (char32_t) newUChar;
183  }
184 
198  static char32_t getNextUtf8Safe(const std::string_view & text, int & offset) {
199  UChar32 newUChar;
200  U8_NEXT(text.data(), offset, (int32_t) text.length(), newUChar); // NOLINT
201  return (char32_t) newUChar;
202  }
203 
215  static char32_t getPreviousUtf8(const std::string_view & text, int & offset) {
216  UChar32 newUChar;
217  U8_PREV_UNSAFE(text.data(), offset, newUChar); // NOLINT
218  return (char32_t) newUChar;
219  }
220 
234  static char32_t getPreviousUtf8Safe(const std::string_view & text, int & offset) {
235  UChar32 newUChar;
236  U8_PREV(text.data(), 0, offset, newUChar); // NOLINT
237  return (char32_t) newUChar;
238  }
239 
248  static void advanceUtf8(const std::string_view & text, int & offset) {
249  U8_FWD_1_UNSAFE(text.data(), offset); // NOLINT
250  }
251 
262  static void advanceUtf8Safe(const std::string_view & text, int & offset) {
263  U8_FWD_1(text.data(), offset, ((int32_t) text.length())); // NOLINT
264  }
265 
274  static void retreatUtf8(const std::string_view & text, int & offset) {
275  U8_BACK_1_UNSAFE(text.data(), offset);
276  }
277 
288  static void retreatUtf8Safe(const std::string_view & text, int & offset) {
289  U8_BACK_1(((uint8_t *) text.data()), ((int32_t) 0), offset);
290  }
291 
293 
294  static bool isValid(char32_t c) {
295  return c < 0x10FFFF; // Maximum Unicode code point value.
296  }
297 
299 
303  static char32_t toUpper(char32_t c) {
304  return (char32_t) u_toupper((UChar32) c);
305  }
306 
310  static char32_t toLower(char32_t c) {
311  return (char32_t) u_tolower((UChar32) c);
312  }
313 
329  static void setUtf8AndAdvanceOffset(std::string & destination, int & offset, char32_t c) {
330  auto newUChar = (UChar32) c;
331  U8_APPEND_UNSAFE(&destination[0], offset, newUChar); // NOLINT
332  }
333 };
334 
335 } // namespace Balau
336 
337 #endif // COM_BORA_SOFTWARE__BALAU_TYPE__CHARACTER
static char32_t toUpper(char32_t c)
Convert the supplied code point to uppercase.
Definition: Character.hpp:303
static bool isUpper(char32_t c)
Does the specified code point have the general category "Lu" (uppercase letter).
Definition: Character.hpp:43
static bool isPunctuation(char32_t c)
Does the specified code point have the general category "P" (punctuation).
Definition: Character.hpp:127
static void advanceUtf8Safe(const std::string_view &text, int &offset)
Advance the supplied offset from one code point boundary to the next one (validating version)...
Definition: Character.hpp:262
static char32_t toLower(char32_t c)
Convert the supplied code point to lowercase.
Definition: Character.hpp:310
static bool isOctalDigit(char32_t c)
Is the specified code point one of the ASCII characters 0-7.
Definition: Character.hpp:64
static bool isLower(char32_t c)
Does the specified code point have the general category "Ll" (lowercase letter).
Definition: Character.hpp:36
static bool isInclusiveBreakableCharacter(char32_t c)
Is the specified code point a breakable character for line endings that should be printed...
Definition: Character.hpp:155
static char32_t getNextUtf8(const std::string_view &text, int &offset)
Get the next code point from the UTF-8 string view.
Definition: Character.hpp:179
The root Balau namespace.
Definition: ApplicationConfiguration.hpp:23
static void retreatUtf8Safe(const std::string_view &text, int &offset)
Retreat the supplied offset from one code point boundary to the previous one (validating version)...
Definition: Character.hpp:288
static bool isControlCharacter(char32_t c)
Is the specified code point a control character.
Definition: Character.hpp:92
static bool isBlank(char32_t c)
Is the specified code point a character that visibly separates words on a line.
Definition: Character.hpp:113
static void advanceUtf8(const std::string_view &text, int &offset)
Advance the supplied offset from one code point boundary to the next one.
Definition: Character.hpp:248
static bool isHexDigit(char32_t c)
Does the specified code point have the general category "Nd" (decimal digit numbers) or is one of the...
Definition: Character.hpp:57
static bool isBinaryDigit(char32_t c)
Is the specified code point one of the ASCII characters 0-1.
Definition: Character.hpp:71
static char32_t getNextUtf8Safe(const std::string_view &text, int &offset)
Get the next code point from the UTF-8 string view (validating version).
Definition: Character.hpp:198
static bool isWhitespace(char32_t c)
Is the specified code point a whitespace character.
Definition: Character.hpp:106
static char32_t getPreviousUtf8Safe(const std::string_view &text, int &offset)
Get the previous code point from the UTF-8 string view (validating version).
Definition: Character.hpp:234
Core includes, typedefs and functions.
static void retreatUtf8(const std::string_view &text, int &offset)
Retreat the supplied offset from one code point boundary to the previous one.
Definition: Character.hpp:274
static bool isAlpha(char32_t c)
Does the specified code point have the general category "L" (letters).
Definition: Character.hpp:78
static bool isDigit(char32_t c)
Does the specified code point have the general category "Nd" (decimal digit numbers).
Definition: Character.hpp:50
static char32_t getPreviousUtf8(const std::string_view &text, int &offset)
Get the previous code point from the UTF-8 string view.
Definition: Character.hpp:215
static size_t utf8ByteCount(char32_t c)
Returns the number of bytes that the character occupies when UTF-8 encoded.
Definition: Character.hpp:162
Utilities for unicode characters and code points.
Definition: Character.hpp:30
static bool isSpace(char32_t c)
Is the specified code point a space character (excluding CR / LF).
Definition: Character.hpp:99
static bool isIdStart(char32_t c)
Does the specified code point have the general category "L" (letters) or "Nl" (letter numbers)...
Definition: Character.hpp:134
static bool isPrintable(char32_t c)
Is the specified code point a printable character.
Definition: Character.hpp:120
static void setUtf8AndAdvanceOffset(std::string &destination, int &offset, char32_t c)
Write a code point into the supplied UTF-8 string.
Definition: Character.hpp:329
static bool isBreakableCharacter(char32_t c)
Is the specified code point a breakable character for line endings.
Definition: Character.hpp:148
static bool isIdPart(char32_t c)
Is the specified code point valid as part of an Id.
Definition: Character.hpp:141
static bool isAlphaOrDecimal(char32_t c)
Does the specified code point have the general category "L" (letters) or "Nd" (decimal digit numbers)...
Definition: Character.hpp:85