17 #ifndef COM_BORA_SOFTWARE__BALAU_LANG__ABSTRACT_SCANNER 18 #define COM_BORA_SOFTWARE__BALAU_LANG__ABSTRACT_SCANNER 55 currentStartOffset = 0U;
56 currentEndOffset = 0U;
57 TokenT token = getNextToken();
59 while (token != TokenT::EndOfFile) {
60 tokens.emplace_back(token);
61 startOffsets.emplace_back(currentStartOffset);
62 currentStartOffset = currentEndOffset;
63 token = getNextToken();
67 tokens.emplace_back(token);
68 startOffsets.emplace_back(currentStartOffset);
75 protected:
explicit AbstractScanner(std::shared_ptr<Resource::Uri> uri_)
77 , uri(std::move(uri_))
78 , text(::
toString(uri->byteReadResource()->readStream()))
79 , currentEndOffset(0) {}
81 protected:
AbstractScanner(std::shared_ptr<Resource::Uri> uri_, std::istream & inputStream_)
83 , uri(std::move(uri_))
85 , currentEndOffset(0) {}
87 protected:
void readNextChar() {
88 if (currentEndOffset == (
int) text.length()) {
89 currentChar = std::char_traits<char32_t>::eof();
93 if (currentChar < 0) {
96 ,
"Invalid UTF-8 text found during parsing of string literal", calculateCurrentCodeSpan()
103 protected: std::string_view getCurrentString() {
104 return std::string_view(text.data() + currentStartOffset, currentEndOffset - currentStartOffset);
107 protected:
template <
typename ContainerT,
typename ReportT>
108 bool readNextChar(ContainerT & container,
const std::function<ReportT (
const CodeSpan &)> & errorReport) {
109 if (currentEndOffset == (
int) text.length()) {
110 currentChar = std::char_traits<char32_t>::eof();
114 if (currentChar < 0) {
115 container.push_back(errorReport(calculateCurrentCodeSpan()));
123 protected:
void putBackCurrentChar() {
124 if (currentChar == std::char_traits<char32_t>::eof() || text.length() == 0 || currentEndOffset == 0) {
128 if (currentEndOffset > 1) {
132 }
else if (currentEndOffset == 1) {
140 protected:
void extractStringConstantDoubleQuotes() {
145 switch (currentChar) {
155 case std::char_traits<char32_t>::eof(): {
158 ,
"End of file found during parsing of string literal", calculateCurrentCodeSpan()
169 protected:
template <
typename ContainerT,
typename ReportT>
170 bool extractStringConstantDoubleQuotes(ContainerT & container,
const std::function<ReportT (
const CodeSpan &)> & errorReport) {
175 switch (currentChar) {
185 case std::char_traits<char32_t>::eof(): {
186 container.push_back(errorReport(calculateCurrentCodeSpan()));
197 protected: TokenT createWhitespaceToken() {
201 return createBlankToken();
203 return createLineBreakToken();
207 protected: TokenT createBlankToken() {
214 putBackCurrentChar();
215 return TokenT::Blank;
218 protected: TokenT createLineBreakToken() {
221 if (currentChar ==
'\n') {
224 if (currentChar !=
'\r') {
225 putBackCurrentChar();
230 if (currentChar !=
'\n') {
231 putBackCurrentChar();
235 return TokenT::LineBreak;
238 protected:
CodeSpan calculateCurrentCodeSpan() {
240 text, tokens, startOffsets, (
unsigned int) tokens.size() - 1
250 private:
virtual TokenT getNextToken() = 0;
252 protected: char32_t currentChar;
254 private: std::shared_ptr<Resource::Uri> uri;
255 private: std::string text;
256 private: std::istringstream inputStream;
257 private: std::stack<char32_t> putBackBuffer;
258 private:
int currentStartOffset;
259 private:
int currentEndOffset;
260 private: std::vector<TokenT> tokens;
261 private: std::vector<unsigned int> startOffsets;
266 #endif // COM_BORA_SOFTWARE__BALAU_LANG__ABSTRACT_SCANNER Thrown when a parser incurs invalid syntax.
Definition: ParsingExceptions.hpp:28
Base class for scanners.
Definition: AbstractScanner.hpp:50
ScannedTokens< TokenT > scan()
Scan the input and return a ScannedTokens data structure.
Definition: AbstractScanner.hpp:54
#define ThrowBalauException(ExceptionClass,...)
Throw a Balau style exception, with implicit file and line number, and optional stacktrace.
Definition: BalauException.hpp:45
Encapsulation of a set of tokens, text, and offsets plus various API wrappers.
Information on the span of some source code text.
Definition: CodeSpan.hpp:91
static char32_t getNextUtf8(const std::string_view &text, int &offset)
Get the next code point from the UTF-8 string view.
Definition: Character.hpp:179
static bool isBlank(char32_t c)
Is the specified code point a character that visibly separates words on a line.
Definition: Character.hpp:113
Encapsulation of a set of language tokens, source text, and start offsets.
Definition: ScannedTokens.hpp:47
Parsing tools and parser implementations.
Definition: AbstractScanner.hpp:27
Balau::U8String< AllocatorT > toString(const CodeSpan &codeSpan)
Print the supplied code span as a UTF-8 string.
Definition: CodeSpan.hpp:235
A position in a piece of multi-line text.
static CodeSpan determineCodeSpan(const ScannedTokens< TokenT > &scannedTokens, unsigned int index)
Utility to determine the overall code span for the specified index.
Definition: ScannedTokens.hpp:199
static void retreatUtf8(const std::string_view &text, int &offset)
Retreat the supplied offset from one code point boundary to the previous one.
Definition: Character.hpp:274
A read-only resource which is read as bytes.