ScannedTokens.hpp
Go to the documentation of this file.
1 // @formatter:off
2 //
3 // Balau core C++ library
4 //
5 // Copyright (C) 2008 Bora Software (contact@borasoftware.com)
6 //
7 // Licensed under the Boost Software License - Version 1.0 - August 17th, 2003.
8 // See the LICENSE file for the full license text.
9 //
10 
16 
17 #ifndef COM_BORA_SOFTWARE__BALAU_LANG__SCANNED_TOKENS
18 #define COM_BORA_SOFTWARE__BALAU_LANG__SCANNED_TOKENS
19 
23 #include <Balau/Dev/Assert.hpp>
24 #include <Balau/Util/Strings.hpp>
25 
26 #include <stack>
27 
28 #pragma clang diagnostic push
29 #pragma ide diagnostic ignored "OCUnusedGlobalDeclarationInspection"
30 
31 namespace Balau::Lang {
32 
36 enum class WhitespaceMode {
37  ConsumeWhitespaceAndComments
38  , ConsumeWhitespace
39  , ConsumeBlanksAndComments
40  , ConsumeBlanks
41  , ConsumeLineBreaksAndComments
42  , ConsumeLineBreaks
43  , ConsumeComments
44  , DoNotConsume
45 };
46 
47 template <typename TokenT> class ScannedTokens;
48 template <typename TokenT> class RandomAccessScannedTokens;
49 template <typename TokenT> class ScannerApiScannedTokens;
50 template <typename TokenT> class IterativeScannedTokens;
51 
91 template <typename TokenT> class ScannedTokens final {
92  public: ScannedTokens(std::shared_ptr<const Resource::Uri> uri_,
93  std::string && text_,
94  std::vector<TokenT> && tokens_,
95  std::vector<unsigned int> && startOffsets_)
96  : uri(std::move(uri_))
97  , text(std::move(text_))
98  , tokens(tokens_)
99  , startOffsets(startOffsets_) {
100  Assert::assertion(tokens.back() == TokenT::EndOfFile, "ScannedTokens must end with an EndOfFile token.");
101  }
102 
106  public: std::string_view getText(unsigned int index) {
107  if (index >= tokens.size()) {
108  return std::string_view(text.data() + text.length(), 0);
109  } else if (index == tokens.size() - 1) {
110  return std::string_view(
111  text.data() + startOffsets[index], text.length() - startOffsets[index]
112  );
113  } else {
114  return std::string_view(
115  text.data() + startOffsets[index]
116  , startOffsets[index + 1] - startOffsets[index]
117  );
118  }
119  }
120 
128  public: std::string && moveTextOut() {
129  return std::move(text);
130  }
131 
137  public: std::shared_ptr<const Resource::Uri> getUri() {
138  return uri;
139  }
140 
142 
143  private: std::shared_ptr<const Resource::Uri> uri;
144  private: std::string text;
145  private: std::vector<TokenT> tokens;
146  private: std::vector<unsigned int> startOffsets;
147 
148  friend class ScannerApiScannedTokens<TokenT>;
149  friend class IterativeScannedTokens<TokenT>;
150  friend class RandomAccessScannedTokens<TokenT>;
151 };
152 
154 
164 template <typename TokenT> class RandomAccessScannedTokens final {
170  public: explicit RandomAccessScannedTokens(ScannedTokens<TokenT> && scannedTokens_)
171  : scannedTokens(std::move(scannedTokens_))
172  , codeSpans(buildCodeSpans(scannedTokens)) {}
173 
179  public: const ScannedTokens<TokenT> & getScannedTokens() const {
180  return scannedTokens;
181  }
182 
188  public: const std::vector<CodeSpan> & getCodeSpans() const {
189  return codeSpans;
190  }
191 
199  public: static CodeSpan determineCodeSpan(const ScannedTokens<TokenT> & scannedTokens,
200  unsigned int index) {
201  return determineCodeSpan(scannedTokens.text, scannedTokens.tokens, scannedTokens.startOffsets, index);
202  }
203 
211  public: static CodeSpan determineCodeSpan(const std::string & text,
212  const std::vector<TokenT> & tokens,
213  const std::vector<unsigned int> & startOffsets,
214  unsigned int index) {
215  if (tokens.size() <= index) {
218  , std::string("scanned token range is 0 to ") + ::toString(tokens.size()), index
219  );
220  }
221 
222  std::vector<unsigned int> lineLengthCache;
223  CodePosition startPosition(1, 1);
224  CodePosition endPosition(1, 1);
225 
226  // tokens.size() > 0
227 
228  for (size_t thisIndex = 0; thisIndex < tokens.size() - 1; thisIndex++) {
229  determineNewEnd(
230  tokens[thisIndex]
231  , text
232  , startOffsets[thisIndex]
233  , startOffsets[thisIndex + 1]
234  , endPosition
235  , lineLengthCache
236  );
237 
238  startPosition = endPosition;
239 
240  if (index == thisIndex) {
241  return CodeSpan(startPosition, endPosition);
242  }
243  }
244 
245  determineNewEnd(
246  tokens[tokens.size() - 1]
247  , text
248  , startOffsets[tokens.size() - 1]
249  , (unsigned int) text.length()
250  , endPosition
251  , lineLengthCache
252  );
253 
254  return CodeSpan(startPosition, endPosition);
255  }
256 
264  public: std::string && moveTextOut() {
265  return std::move(scannedTokens.moveTextOut());
266  }
267 
269 
270  // Builds all the code spans from the supplied scanned tokens.
271  private: static std::vector<CodeSpan> buildCodeSpans(const ScannedTokens<TokenT> & scannedTokens) {
272  const std::vector<TokenT> & tokens = scannedTokens.tokens;
273  const std::vector<unsigned int> & startOffsets = scannedTokens.startOffsets;
274 
275  std::vector<CodeSpan> builtCodeSpans;
276 
277  if (tokens.size() == 0) {
278  return builtCodeSpans;
279  }
280 
281  std::vector<unsigned int> lineLengthCache;
282  CodePosition startPosition(1, 1);
283  CodePosition endPosition(1, 1);
284 
285  // tokens.size() > 0
286 
287  for (size_t index = 0; index < tokens.size() - 1; index++) {
288  determineNewEnd(
289  tokens[index]
290  , scannedTokens.text
291  , startOffsets[index]
292  , startOffsets[index + 1]
293  , endPosition
294  , lineLengthCache
295  );
296 
297  builtCodeSpans.emplace_back(CodeSpan(startPosition, endPosition));
298 
299  startPosition = endPosition;
300  }
301 
302  determineNewEnd(
303  tokens[tokens.size() - 1]
304  , scannedTokens.text
305  , startOffsets[tokens.size() - 1]
306  , (unsigned int) scannedTokens.text.length()
307  , endPosition
308  , lineLengthCache
309  );
310 
311  builtCodeSpans.emplace_back(CodeSpan(startPosition, endPosition));
312 
313  return builtCodeSpans;
314  }
315 
316  // Updates in place the supplied end position for advancing.
317  // Also updates the line length cache when a line break is encountered.
318  private: static void determineNewEnd(TokenT token,
319  const std::string & fullText,
320  unsigned int thisOffset,
321  unsigned int nextOffset,
322  CodePosition & endPosition,
323  std::vector<unsigned int> & lineLengthCache) {
324  static const std::regex endLineBreakRegex = std::regex("(\n\r|\r\n|\n|\r)$");
325 
326  if (token == TokenT::LineBreak || token == TokenT::CommentLine || token == TokenT::CommentBlock) {
327  const std::string thisText = fullText.substr(thisOffset, nextOffset - thisOffset);
328  const std::vector<size_t> lineLengths = Util::Strings::lineLengths(thisText);
329  const bool hasCurrentLine = !std::regex_search(thisText, endLineBreakRegex);
330 
332 
334  !lineLengths.empty()
335  , [&] () { return std::string(
336  "A line break token was supplied in the determineNewEnd function"
337  "call but no line end was found in the supplied text. "
338  "Start offset = "
339  ) + toString(thisOffset) + ", end offset = " +
340  toString(nextOffset) + ", token text = " + thisText; }
341  );
342 
343  // The first line length is added to the current line length, then
344  // the intermediate line lengths are added, then the current line
345  // length becomes the current line length.
346 
347  // First.
348  endPosition.column += lineLengths[0];
349  ++endPosition.line;
350  lineLengthCache.push_back(endPosition.column - 1); // one indexed column position
351 
352  if (hasCurrentLine) {
353  // Intermediate.
354  for (size_t m = 1; m < lineLengths.size() - 1; m++) {
355  lineLengthCache.push_back((unsigned int) lineLengths[m]);
356  ++endPosition.line;
357  }
358 
359  // Current.
360  endPosition.column = 1 + (unsigned int) lineLengths.back();
361  } else {
362  // Intermediate.
363  for (size_t m = 1; m < lineLengths.size(); m++) {
364  lineLengthCache.push_back((unsigned int) lineLengths[m]);
365  ++endPosition.line;
366  }
367 
368  // Current.
369  endPosition.column = 1;
370  }
371  } else {
372  endPosition.column += nextOffset - thisOffset;
373  }
374  }
375 
376  // Updates in place the supplied start position when retreating.
377  // Also updates the line length cache when a line break is encountered.
378  private: static void determineNewStart(TokenT token,
379  const std::string & fullText,
380  unsigned int thisOffset,
381  unsigned int nextOffset,
382  CodePosition & startPosition,
383  std::vector<unsigned int> & lineLengthCache) {
384  static const std::regex lineBreakRegex = std::regex("\n\r|\r\n|\n|\r");
385 
386  if (token == TokenT::LineBreak || token == TokenT::CommentLine || token == TokenT::CommentBlock) {
387  const unsigned int lineBreaks = (unsigned int) Util::Strings::occurrences(fullText.substr(thisOffset, nextOffset - thisOffset), lineBreakRegex);
388 
390 
392  lineBreaks != 0
393  , [&] () {
394  return toString(
395  "A line break token was supplied in the determineNewStart function"
396  , "call but no line end was found in the supplied text. "
397  , "Start offset = ", thisOffset
398  , ", end offset = ", nextOffset
399  , ", token text = ", fullText.substr(thisOffset, nextOffset - thisOffset)
400  );
401  }
402  );
403 
404  for (size_t m = 0; m < lineBreaks; m++) {
405  startPosition.column = lineLengthCache.back() + 1; // one indexed column position
406  lineLengthCache.pop_back();
407  }
408 
409  startPosition.line -= lineBreaks;
410  } else {
411  startPosition.column -= nextOffset - thisOffset;
412  }
413  }
414 
415  friend class ScannerApiScannedTokens<TokenT>;
416 
417  private: ScannedTokens<TokenT> scannedTokens;
418  private: std::vector<CodeSpan> codeSpans;
419 };
420 
422 
434 template <typename TokenT> class ScannedToken {
438  public: TokenT token;
439 
444 
448  public: std::string_view text;
449 
451 
452  private: ScannedToken(TokenT token_, CodeSpan codeSpan_, std::string_view text_)
453  : token(token_)
454  , codeSpan(std::move(codeSpan_))
455  , text(text_) {}
456 
457  friend class ScannerApiScannedTokens<TokenT>;
458 };
459 
463 template <typename TokenT> class ScannerApiScannedTokens final {
467  public: explicit ScannerApiScannedTokens(ScannedTokens<TokenT> && scannedTokens_) noexcept
468  : scannedTokens(std::move(scannedTokens_))
469  , currentIndex(0)
470  , currentCodeSpan(1, 1, 1, 1) {
471  whitespaceModeStack.push(WhitespaceMode::DoNotConsume);
472  if (scannedTokens.tokens.size() > 1) {
474  scannedTokens.tokens[0]
475  , scannedTokens.text
476  , scannedTokens.startOffsets[0]
477  , scannedTokens.startOffsets[1]
478  , currentCodeSpan.end
479  , lineLengthCache
480  );
481  }
482  }
483 
487  public: void reset() {
488  while (!whitespaceModeStack.empty()) {
489  whitespaceModeStack.pop();
490  }
491 
492  whitespaceModeStack.push(WhitespaceMode::DoNotConsume);
493  currentIndex = 0;
494  currentCodeSpan = CodeSpan(1, 1, 1, 1);
495  lineLengthCache.clear();
496  }
497 
502  public: ScannedToken<TokenT> get() {
503  const std::vector<TokenT> & tokens = scannedTokens.tokens;
504 
505  switch (whitespaceModeStack.top()) {
506  case WhitespaceMode::ConsumeWhitespaceAndComments: {
507  while (tokens[currentIndex] == TokenT::Blank
508  || tokens[currentIndex] == TokenT::LineBreak
509  || tokens[currentIndex] == TokenT::CommentLine) {
510  advanceCurrentIndex();
511  }
512 
513  break;
514  }
515 
516  case WhitespaceMode::ConsumeWhitespace: {
517  while (tokens[currentIndex] == TokenT::Blank || tokens[currentIndex] == TokenT::LineBreak) {
518  advanceCurrentIndex();
519  }
520 
521  break;
522  }
523 
524  case WhitespaceMode::ConsumeBlanksAndComments: {
525  while (tokens[currentIndex] == TokenT::Blank || tokens[currentIndex] == TokenT::CommentLine) {
526  advanceCurrentIndex();
527  }
528 
529  break;
530  }
531 
532  case WhitespaceMode::ConsumeBlanks: {
533  while (tokens[currentIndex] == TokenT::Blank) {
534  advanceCurrentIndex();
535  }
536 
537  break;
538  }
539 
540  case WhitespaceMode::ConsumeLineBreaksAndComments: {
541  while (tokens[currentIndex] == TokenT::LineBreak || tokens[currentIndex] == TokenT::CommentLine) {
542  advanceCurrentIndex();
543  }
544 
545  break;
546  }
547 
548  case WhitespaceMode::ConsumeLineBreaks: {
549  while (tokens[currentIndex] == TokenT::LineBreak) {
550  advanceCurrentIndex();
551  }
552 
553  break;
554  }
555 
556  case WhitespaceMode::ConsumeComments: {
557  while (tokens[currentIndex] == TokenT::CommentLine) {
558  advanceCurrentIndex();
559  }
560 
561  break;
562  }
563 
564  default: { // WhitespaceMode::DoNotConsume
565  break;
566  }
567  }
568 
569  return ScannedToken<TokenT>(
570  tokens[currentIndex], currentCodeSpan, scannedTokens.getText(currentIndex)
571  );
572  }
573 
579  public: void consume() {
580  advanceCurrentIndex();
581  }
582 
593  public: void expect(const TokenT token, std::string_view errorMessage) {
594  get();
595 
596  if (scannedTokens.tokens[currentIndex] == token) {
597  if (currentIndex < scannedTokens.tokens.size() - 1) {
598  advanceCurrentIndex();
599  }
600  } else {
601  ThrowBalauException(Exception::SyntaxErrorException, std::string(errorMessage), getCurrentCodeSpan());
602  }
603  }
604 
621  public: template <template <typename ...> class ContainerT, typename ... ArgT, typename ReportT>
622  bool expect(const TokenT token,
623  ContainerT<ReportT, ArgT ...> & container,
624  const size_t maxErrorCount,
625  const std::function<ReportT (const TokenT &, const CodeSpan &, size_t)> & errorReport) {
626  get();
627 
628  if (scannedTokens.tokens[currentIndex] == token) {
629  if (currentIndex < scannedTokens.tokens.size() - 1) {
630  advanceCurrentIndex();
631  }
632 
633  return true;
634  } else {
635  auto error = errorReport(token, getCurrentCodeSpan(), container.size());
636 
637  if (container.size() < maxErrorCount) {
638  container.push_back(errorReport(token, getCurrentCodeSpan(), container.size()));
639  }
640 
641  return false;
642  }
643  }
644 
653  public: void expect(const std::vector<TokenT> & tokens, std::string_view errorMessage) {
654  get();
655 
656  if (std::find(tokens.begin(), tokens.end(), scannedTokens.tokens[currentIndex]) != tokens.end()) {
657  advanceCurrentIndex();
658  } else {
659  ThrowBalauException(Exception::SyntaxErrorException, std::string(errorMessage), getCurrentCodeSpan());
660  }
661  }
662 
679  public: template <template <typename ...> class TokenContainerT, template <typename ...> class ContainerT, typename ... TokenArgT, typename ... ArgT, typename ReportT>
680  bool expect(const TokenContainerT<TokenT, TokenArgT ...> & tokens,
681  ContainerT<ReportT, ArgT ...> & container,
682  const size_t maxErrorCount,
683  const std::function<ReportT (const TokenContainerT<TokenT, TokenArgT ...> &, const CodeSpan &, size_t)> & errorReport) {
684  get();
685 
686  if (std::find(tokens.begin(), tokens.end(), scannedTokens.tokens[currentIndex]) != tokens.end()) {
687  advanceCurrentIndex();
688  return true;
689  } else {
690  if (container.size() < maxErrorCount) {
691  container.push_back(errorReport(tokens, getCurrentCodeSpan(), container.size()));
692  }
693 
694  return false;
695  }
696  }
697 
701  public: class Marker {
702  public: Marker(const Marker & rhs) noexcept : index(rhs.index) {}
703 
704  public: Marker(Marker && rhs) noexcept : index(rhs.index) {}
705 
706  public: Marker & operator = (const Marker & rhs) noexcept {
707  index = rhs.index;
708  return *this;
709  }
710 
711  public: Marker & operator = (Marker && rhs) noexcept {
712  index = rhs.index;
713  return *this;
714  }
715 
716  private: explicit Marker(size_t index_) : index(index_) {}
717 
718  private: size_t index;
719 
720  friend class ScannerApiScannedTokens;
721  };
722 
723  public: Marker mark() const {
724  return Marker(currentIndex);
725  }
726 
727  public: void putBack(const Marker & marker) {
728  if (marker.index > currentIndex) {
731  , ::toString("Marker index (", marker.index, ") is greater than current index (", currentIndex, ")")
732  , marker.index
733  );
734  }
735 
736  const auto steps = currentIndex - marker.index;
737 
738  for (size_t m = 0; m < steps; m++) {
739  putBack();
740  }
741  }
742 
749  public: void putBack() {
750  const std::vector<TokenT> & tokens = scannedTokens.tokens;
751 
752  retreatCurrentIndex();
753 
754  switch (whitespaceModeStack.top()) {
755  case WhitespaceMode::ConsumeWhitespaceAndComments: {
756  while (currentIndex > 0 && (tokens[currentIndex - 1] == TokenT::Blank || tokens[currentIndex - 1] == TokenT::LineBreak || tokens[currentIndex - 1] == TokenT::CommentLine)) {
757  retreatCurrentIndex();
758  }
759 
760  break;
761  }
762 
763  case WhitespaceMode::ConsumeWhitespace: {
764  while (currentIndex > 0 && (tokens[currentIndex - 1] == TokenT::Blank || tokens[currentIndex - 1] == TokenT::LineBreak)) {
765  retreatCurrentIndex();
766  }
767 
768  break;
769  }
770 
771  case WhitespaceMode::ConsumeBlanksAndComments: {
772  while (currentIndex > 0 && (tokens[currentIndex - 1] == TokenT::Blank || tokens[currentIndex - 1] == TokenT::CommentLine)) {
773  retreatCurrentIndex();
774  }
775 
776  break;
777  }
778 
779  case WhitespaceMode::ConsumeBlanks: {
780  while (currentIndex > 0 && tokens[currentIndex - 1] == TokenT::Blank) {
781  retreatCurrentIndex();
782  }
783 
784  break;
785  }
786 
787  case WhitespaceMode::ConsumeLineBreaksAndComments: {
788  while (currentIndex > 0 && (tokens[currentIndex - 1] == TokenT::LineBreak || tokens[currentIndex - 1] == TokenT::CommentLine)) {
789  retreatCurrentIndex();
790  }
791 
792  break;
793  }
794 
795  case WhitespaceMode::ConsumeLineBreaks: {
796  while (currentIndex > 0 && (tokens[currentIndex - 1] == TokenT::LineBreak)) {
797  retreatCurrentIndex();
798  }
799 
800  break;
801  }
802 
803  case WhitespaceMode::ConsumeComments: {
804  while (currentIndex > 0 && (tokens[currentIndex - 1] == TokenT::CommentLine)) {
805  retreatCurrentIndex();
806  }
807 
808  break;
809  }
810 
811  default: { // WhitespaceMode::DoNotConsume
812  break;
813  }
814  }
815  }
816 
820  public: CodeSpan getCurrentCodeSpan() const {
821  return currentCodeSpan;
822  }
823 
827  public: bool currentIsBlank() const {
828  return currentIndex < scannedTokens.tokens.size() && scannedTokens.tokens[currentIndex] == TokenT::Blank;
829  }
830 
834  public: bool currentIsLineBreak() const {
835  return currentIndex < scannedTokens.tokens.size() && scannedTokens.tokens[currentIndex] == TokenT::LineBreak;
836  }
837 
841  public: bool currentIsWhitespace() const {
842  if (currentIndex >= scannedTokens.tokens.size()) {
843  return false;
844  }
845 
846  const TokenT token = scannedTokens.tokens[currentIndex];
847  return token == TokenT::Blank || token == TokenT::LineBreak;
848  }
849 
853  public: bool isBlank(const Marker & marker) const {
854  return scannedTokens.tokens[marker.index] == TokenT::Blank;
855  }
856 
860  public: bool isLineBreak(const Marker & marker) const {
861  return scannedTokens.tokens[marker.index] == TokenT::LineBreak;
862  }
863 
867  public: bool isWhitespace(const Marker & marker) const {
868  const TokenT token = scannedTokens.tokens[marker.index];
869  return token == TokenT::Blank || token == TokenT::LineBreak;
870  }
871 
875  public: void pushWhitespaceMode(WhitespaceMode newMode) {
876  whitespaceModeStack.push(newMode);
877  }
878 
882  public: void popWhitespaceMode() {
883  whitespaceModeStack.pop();
884  }
885 
889  public: size_t size() const {
890  return scannedTokens.tokens.size();
891  }
892 
900  public: std::string && moveTextOut() {
901  return std::move(scannedTokens.moveTextOut());
902  }
903 
909  public: std::shared_ptr<const Resource::Uri> getUri() {
910  return scannedTokens.getUri();
911  }
912 
914 
915  private: void advanceCurrentIndex() {
916  if (currentIndex == scannedTokens.tokens.size() - 1) {
917  return;
918  }
919 
920  currentCodeSpan.start = currentCodeSpan.end;
921  ++currentIndex;
922 
923  if (currentIndex < scannedTokens.tokens.size() - 1) {
924  // Calculate the new end point.
926  scannedTokens.tokens[currentIndex]
927  , scannedTokens.text
928  , scannedTokens.startOffsets[currentIndex]
929  , scannedTokens.startOffsets[currentIndex + 1]
930  , currentCodeSpan.end
931  , lineLengthCache
932  );
933  }
934  }
935 
936  private: void retreatCurrentIndex() {
937  if (currentIndex == 0) {
938  return;
939  }
940 
941  currentCodeSpan.end = currentCodeSpan.start;
942 
944  scannedTokens.tokens[currentIndex - 1]
945  , scannedTokens.text
946  , scannedTokens.startOffsets[currentIndex - 1]
947  , scannedTokens.startOffsets[currentIndex]
948  , currentCodeSpan.start
949  , lineLengthCache
950  );
951 
952  --currentIndex;
953  }
954 
955  private: ScannedTokens<TokenT> scannedTokens;
956  private: unsigned int currentIndex;
957  private: CodeSpan currentCodeSpan;
958  private: std::stack<WhitespaceMode> whitespaceModeStack;
959  private: std::vector<unsigned int> lineLengthCache;
960 };
961 
963 
967 template <typename TokenT> class IterativeScannedTokens final {
971  public: class ConstElement {
972  private: const IterativeScannedTokens<TokenT> * parent;
973  private: const size_t index;
974 
975  public: ConstElement(const ScannedTokens<TokenT> * parent_, size_t index_)
976  : parent(parent_)
977  , index(index_) {}
978 
979  public: TokenT token() const {
980  return parent->getToken(index);
981  }
982 
983  public: unsigned int startOffset() const {
984  return parent->getStartOffset(index);
985  }
986  };
987 
991  public: class const_iterator {
992  friend class IterativeScannedTokens<TokenT>;
993 
994  private: const IterativeScannedTokens<TokenT> * parent;
995  private: size_t index;
996 
997  private: explicit const_iterator(const ScannedTokens<TokenT> * parent_, size_t index_)
998  : parent(parent_)
999  , index(index_) {}
1000 
1001  public: const_iterator(const const_iterator & copy)
1002  : parent(copy.parent)
1003  , index(copy.index) {}
1004 
1005  public: const_iterator & operator = (const const_iterator & copy) {
1006  parent = copy.parent;
1007  index = copy.index;
1008  return *this;
1009  }
1010 
1011  public: const_iterator & operator ++ () {
1012  ++index;
1013  return *this;
1014  }
1015 
1016  public: ConstElement operator * () const {
1017  return ConstElement(parent->scannedTokens.tokens[index], parent->scannedTokens.startOffsets[index]);
1018  }
1019 
1020  public: ConstElement operator -> () const {
1021  return ConstElement(parent->scannedTokens.tokens[index], parent->scannedTokens.startOffsets[index]);
1022  }
1023 
1024  public: bool operator == (const const_iterator & rhs) const {
1025  return parent == rhs.parent && index == rhs.index;
1026  }
1027 
1028  public: bool operator != (const const_iterator & rhs) const {
1029  return ! operator == (rhs);
1030  }
1031  };
1032 
1036  public: class Element {
1037  private: IterativeScannedTokens<TokenT> * parent;
1038  private: const size_t index;
1039 
1040  public: Element(ScannedTokens<TokenT> * parent_, size_t index_)
1041  : parent(parent_)
1042  , index(index_) {}
1043 
1044  public: TokenT token() const {
1045  return parent->getToken(index);
1046  }
1047 
1048  public: TokenT & token() {
1049  return parent->getToken(index);
1050  }
1051 
1052  public: unsigned int startOffset() const {
1053  return parent->getStartOffset(index);
1054  }
1055 
1056  public: unsigned int & startOffset() {
1057  return parent->getStartOffset(index);
1058  }
1059  };
1060 
1064  public: class iterator {
1065  friend class IterativeScannedTokens<TokenT>;
1066 
1067  private: IterativeScannedTokens<TokenT> * parent;
1068  private: size_t index;
1069 
1070  private: explicit iterator(ScannedTokens<TokenT> * parent_, size_t index_)
1071  : parent(parent_)
1072  , index(index_) {}
1073 
1074  public: iterator(const iterator & copy)
1075  : parent(copy.parent)
1076  , index(copy.index) {}
1077 
1078  public: iterator & operator = (const iterator & copy) {
1079  parent = copy.parent;
1080  index = copy.index;
1081  return *this;
1082  }
1083 
1084  public: iterator & operator ++ () {
1085  ++index;
1086  return *this;
1087  }
1088 
1089  public: ConstElement operator * () const {
1090  return ConstElement(parent->scannedTokens.tokens[index], parent->scannedTokens.startOffsets[index]);
1091  }
1092 
1093  public: Element operator * () {
1094  return Element(parent->scannedTokens.tokens[index], parent->scannedTokens.startOffsets[index]);
1095  }
1096 
1097  public: ConstElement operator -> () const {
1098  return ConstElement(parent->scannedTokens.tokens[index], parent->scannedTokens.startOffsets[index]);
1099  }
1100 
1101  public: Element operator -> () {
1102  return Element(parent->scannedTokens.tokens[index], parent->scannedTokens.startOffsets[index]);
1103  }
1104 
1105  public: bool operator == (const iterator & rhs) const {
1106  return parent == rhs.parent && index == rhs.index;
1107  }
1108 
1109  public: bool operator != (const iterator & rhs) const {
1110  return ! operator == (rhs);
1111  }
1112  };
1113 
1114  friend class const_iterator;
1115  friend class iterator;
1116 
1122  public: explicit IterativeScannedTokens(ScannedTokens<TokenT> & scannedTokens_)
1123  : scannedTokens(scannedTokens_) {}
1124 
1128  public: void addToken(TokenT token, unsigned int startOffset) {
1129  scannedTokens.tokens.emplace_back(token);
1130  scannedTokens.startOffsets.emplace_back(startOffset);
1131  }
1132 
1136  public: size_t size() const {
1137  return scannedTokens.tokens.size();
1138  }
1139 
1143  public: TokenT getToken(unsigned int index) const {
1144  return scannedTokens.tokens[index];
1145  }
1146 
1150  public: TokenT & getToken(unsigned int index) {
1151  return scannedTokens.tokens[index];
1152  }
1153 
1154 
1158  public: unsigned int getStartOffset(unsigned int index) const {
1159  return scannedTokens.startOffsets[index];
1160  }
1161 
1165  public: unsigned int & getStartOffset(unsigned int index) {
1166  return scannedTokens.startOffsets[index];
1167  }
1168 
1172  public: const std::vector<TokenT> & getTokens() const {
1173  return scannedTokens.tokens;
1174  }
1175 
1179  public: const std::vector<unsigned int> & getStartOffsets() const {
1180  return scannedTokens.startOffsets;
1181  }
1182 
1186  public: const_iterator begin() const {
1187  return const_iterator(this, 0);
1188  }
1189 
1193  public: iterator begin() {
1194  return iterator(this, 0);
1195  }
1196 
1200  public: const_iterator end() const {
1201  return const_iterator(this, scannedTokens.tokens.size());
1202  }
1203 
1207  public: iterator end() {
1208  return iterator(this, scannedTokens.tokens.size());
1209  }
1210 
1218  public: std::string && moveTextOut() {
1219  return std::move(scannedTokens.moveTextOut());
1220  }
1221 
1223 
1224  private: ScannedTokens<TokenT> scannedTokens;
1225 };
1226 
1227 } // namespace Balau::Lang
1228 
1229 #pragma clang diagnostic pop
1230 
1231 #endif // COM_BORA_SOFTWARE__BALAU_LANG__SCANNED_TOKENS
Thrown when a parser incurs invalid syntax.
Definition: ParsingExceptions.hpp:28
bool operator==(const BalauException &lhs, const BalauException &rhs)
Base class comparison function for Balau exceptions.
Definition: BalauException.hpp:112
bool isWhitespace(const Marker &marker) const
Is the marked token whitespace?
Definition: ScannedTokens.hpp:867
TokenT token
The token type for this scanned token.
Definition: ScannedTokens.hpp:438
bool currentIsLineBreak() const
Is the next token line break?
Definition: ScannedTokens.hpp:834
const std::vector< CodeSpan > & getCodeSpans() const
Get the pre-calculated code spans of each scanned token.
Definition: ScannedTokens.hpp:188
std::shared_ptr< const Resource::Uri > getUri()
Get the input uri.
Definition: ScannedTokens.hpp:137
unsigned int column
The column of the code position (1-indexed).
Definition: CodeSpan.hpp:39
A ScannedTokens adaptor that provides an iteration API.
Definition: ScannedTokens.hpp:50
TokenT getToken(unsigned int index) const
Get the token with the specified index.
Definition: ScannedTokens.hpp:1143
void reset()
Rest the scanner API state to the beginning of the scanned tokens.
Definition: ScannedTokens.hpp:487
void pushWhitespaceMode(WhitespaceMode newMode)
Push the supplied whitespace mode onto the whitespace mode stack.
Definition: ScannedTokens.hpp:875
static CodeSpan determineCodeSpan(const std::string &text, const std::vector< TokenT > &tokens, const std::vector< unsigned int > &startOffsets, unsigned int index)
Utility to determine the overall code span for the specified index.
Definition: ScannedTokens.hpp:211
#define ThrowBalauException(ExceptionClass,...)
Throw a Balau style exception, with implicit file and line number, and optional stacktrace.
Definition: BalauException.hpp:45
A position in a piece of multi-line text.
Definition: CodeSpan.hpp:30
static std::vector< size_t > lineLengths(const StringT< char, T ... > &text, const std::regex &lineBreakRegex, bool includeExtraTextAsLine=true)
Determine the lengths of the lines in bytes, given the line break regular expression.
Definition: Strings.hpp:298
std::shared_ptr< const Resource::Uri > getUri()
Move the input uri to its final destination.
Definition: ScannedTokens.hpp:909
IterativeScannedTokens(ScannedTokens< TokenT > &scannedTokens_)
Create a scanned tokens data structure, specifying the source URI and the source text.
Definition: ScannedTokens.hpp:1122
void expect(const TokenT token, std::string_view errorMessage)
Expect the supplied token.
Definition: ScannedTokens.hpp:593
Scanned tokens const iterator.
Definition: ScannedTokens.hpp:991
bool currentIsBlank() const
Is the next token a blank?
Definition: ScannedTokens.hpp:827
unsigned int line
The line of the code position (1-indexed).
Definition: CodeSpan.hpp:34
Information on the span of some source code text.
Definition: CodeSpan.hpp:91
Balau exceptions for containers.
Balau::U8String< AllocatorT > toString(const BalauException &e)
Base class toString<AllocatorT> function for Balau exceptions.
Definition: BalauException.hpp:122
STL namespace.
TokenT & getToken(unsigned int index)
Get a reference to the token with the specified index.
Definition: ScannedTokens.hpp:1150
size_t size() const
Get the number of tokens.
Definition: ScannedTokens.hpp:889
CodeSpan getCurrentCodeSpan() const
Get the code span of the current token.
Definition: ScannedTokens.hpp:820
void consume()
Consume the current token.
Definition: ScannedTokens.hpp:579
Encapsulation of a set of language tokens, source text, and start offsets.
Definition: ScannedTokens.hpp:47
std::string_view getText(unsigned int index)
Get the text of the token with the specified index.
Definition: ScannedTokens.hpp:106
std::string && moveTextOut()
Move the input text string to its final destination.
Definition: ScannedTokens.hpp:128
bool isLineBreak(const Marker &marker) const
Is the marked token line break?
Definition: ScannedTokens.hpp:860
Parsing tools and parser implementations.
Definition: AbstractScanner.hpp:27
void expect(const std::vector< TokenT > &tokens, std::string_view errorMessage)
Expect one of the supplied tokens.
Definition: ScannedTokens.hpp:653
const_iterator end() const
Get a const iterator positioned at the end of the data.
Definition: ScannedTokens.hpp:1200
A position marker that can be obtained at any point during parsing, in order to put back multiple tok...
Definition: ScannedTokens.hpp:701
Immutable, random access wrapper over a ScannedTokens data structure.
Definition: ScannedTokens.hpp:48
Balau::U8String< AllocatorT > toString(const CodeSpan &codeSpan)
Print the supplied code span as a UTF-8 string.
Definition: CodeSpan.hpp:235
A position in a piece of multi-line text.
static CodeSpan determineCodeSpan(const ScannedTokens< TokenT > &scannedTokens, unsigned int index)
Utility to determine the overall code span for the specified index.
Definition: ScannedTokens.hpp:199
Scanned tokens non-const iterator.
Definition: ScannedTokens.hpp:1064
Utilities for strings.
unsigned int getStartOffset(unsigned int index) const
Get the start offset of the token with the specified index.
Definition: ScannedTokens.hpp:1158
Returned by const iterators when accessing elements.
Definition: ScannedTokens.hpp:971
RandomAccessScannedTokens(ScannedTokens< TokenT > &&scannedTokens_)
Instantiate a random access scanned tokens data structure.
Definition: ScannedTokens.hpp:170
std::string && moveTextOut()
Move the input text string to its final destination.
Definition: ScannedTokens.hpp:1218
bool currentIsWhitespace() const
Is the next token whitespace?
Definition: ScannedTokens.hpp:841
A ScannedTokens adaptor that provides a traditional scanner API.
Definition: ScannedTokens.hpp:49
Thrown when a specified index is not in the valid range.
Definition: ContainerExceptions.hpp:27
bool expect(const TokenContainerT< TokenT, TokenArgT ... > &tokens, ContainerT< ReportT, ArgT ... > &container, const size_t maxErrorCount, const std::function< ReportT(const TokenContainerT< TokenT, TokenArgT ... > &, const CodeSpan &, size_t)> &errorReport)
Expect one of the supplied tokens.
Definition: ScannedTokens.hpp:680
const std::vector< TokenT > & getTokens() const
Get the token vector.
Definition: ScannedTokens.hpp:1172
Represents a single scanned token in the data structure.
Definition: ScannedTokens.hpp:434
CodeSpan codeSpan
The start and end positions of the token&#39;s text within the multi-line source file.
Definition: ScannedTokens.hpp:443
iterator begin()
Get a non-const iterator positioned at the beginning of the data.
Definition: ScannedTokens.hpp:1193
const std::vector< unsigned int > & getStartOffsets() const
Get the start offsets vector.
Definition: ScannedTokens.hpp:1179
size_t size() const
Get the number of tokens.
Definition: ScannedTokens.hpp:1136
Assertion utilities for development purposes.
void putBack()
Put back the current token.
Definition: ScannedTokens.hpp:749
std::string && moveTextOut()
Move the input text string to its final destination.
Definition: ScannedTokens.hpp:264
WhitespaceMode
Determines the whitespace scanning mode used in the scanning API.
Definition: ScannedTokens.hpp:36
unsigned int & getStartOffset(unsigned int index)
Get a reference to the start offset of the token with the specified index.
Definition: ScannedTokens.hpp:1165
Returned by non-const iterators when accessing elements.
Definition: ScannedTokens.hpp:1036
std::string_view text
UTF-8 formatted string of the token.
Definition: ScannedTokens.hpp:448
void addToken(TokenT token, unsigned int startOffset)
Add a token to the data structure, specifying the token and the start offset into the text...
Definition: ScannedTokens.hpp:1128
const_iterator begin() const
Get a const iterator positioned at the beginning of the data.
Definition: ScannedTokens.hpp:1186
bool isBlank(const Marker &marker) const
Is the marked token a blank?
Definition: ScannedTokens.hpp:853
ScannerApiScannedTokens(ScannedTokens< TokenT > &&scannedTokens_) noexcept
Create a scanner API scanned tokens instance.
Definition: ScannedTokens.hpp:467
std::string && moveTextOut()
Move the input text string to its final destination.
Definition: ScannedTokens.hpp:900
static size_t occurrences(const CharT *str, const SubstrT &substring)
How many non-overlapping occurrences of the second string are found in the first string?
Definition: Strings.hpp:208
Balau exceptions for the parser tools.
const ScannedTokens< TokenT > & getScannedTokens() const
Get the scanned tokens structure that this random access wrapper contains.
Definition: ScannedTokens.hpp:179
static void assertion(bool test, StringFunctionT function)
If the bug test assertion fails, abort after logging the message supplied by the function.
Definition: Assert.hpp:49
bool expect(const TokenT token, ContainerT< ReportT, ArgT ... > &container, const size_t maxErrorCount, const std::function< ReportT(const TokenT &, const CodeSpan &, size_t)> &errorReport)
Expect the supplied token.
Definition: ScannedTokens.hpp:622
void popWhitespaceMode()
Pop the top of the whitespace mode stack.
Definition: ScannedTokens.hpp:882
iterator end()
Get a non-const iterator positioned at the end of the data.
Definition: ScannedTokens.hpp:1207