From 99ee1d33c09d79821af6f0dbf11b74e31105ed73 Mon Sep 17 00:00:00 2001 From: Christopher Neal Date: Wed, 1 Apr 2026 01:16:30 -0400 Subject: [PATCH 1/6] comments added to parse.cc, indentation adjusted, and tests added for parse.cc functions. --- quickTest/Tools/test_parse.cc | 234 +++++++++++++++++++++++++++++ src/Tools/parse.cc | 274 +++++++++++++++++++++------------- 2 files changed, 407 insertions(+), 101 deletions(-) create mode 100644 quickTest/Tools/test_parse.cc diff --git a/quickTest/Tools/test_parse.cc b/quickTest/Tools/test_parse.cc new file mode 100644 index 00000000..6636b277 --- /dev/null +++ b/quickTest/Tools/test_parse.cc @@ -0,0 +1,234 @@ +#include +#include +#include +#include + +#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN +#include + +#include + +#ifndef LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS +#define LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS 0 +#endif + +namespace { +std::string read_remaining(std::istream &in) { + return std::string(std::istreambuf_iterator(in), + std::istreambuf_iterator()); +} +} // namespace + +//---------------------------------------------------------------------------- +// Potential Bugs +//---------------------------------------------------------------------------- +// +// These are cases that appear to demonstrate behavior that is not intended. +// +// Enable them with LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS=1 when we want to +// revisit these behaviors. + +// The parser already treats "12ex" as the valid prefix "12" with "ex" left in +// the stream. Malformed exponent tails such as "e+" and "e-" should behave the +// same way instead of being silently consumed. +TEST_CASE("known bug: get_real should not consume incomplete exponent suffixes [known-bug]" * + doctest::skip(LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS == 0)) { + struct RealCase { + const char *text; + const char *rest; + }; + + const std::vector cases = { + {"1e+", "e+"}, + {"1e-", "e-"}, + }; + + for(const auto &tc : cases) { + CAPTURE(tc.text); + std::istringstream in(tc.text); + + CHECK(Loci::parse::is_real(in)); + CHECK(Loci::parse::get_real(in) == doctest::Approx(1.0)); + CHECK(read_remaining(in) == tc.rest); + } +} + +// is_int() and is_real() currently treat a leading + or - as sufficient to +// begin a number, even when no digits follow. That leaks upward into callers +// like options_list and UNIT_type, which can accept "+" or "-" as zero. +TEST_CASE("known bug: bare signs should not be classified as numeric literals [known-bug]" * + doctest::skip(LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS == 0)) { + const std::vector cases = { + "+", + "-", + "+foo", + "-foo", + }; + + for(const auto &tc : cases) { + CAPTURE(tc); + std::istringstream int_in(tc); + std::istringstream real_in(tc); + + CHECK_FALSE(Loci::parse::is_int(int_in)); + CHECK_FALSE(Loci::parse::is_real(real_in)); + } +} + +TEST_CASE("kill_white_space skips leading whitespace and line comments") { + std::istringstream in(" \t\n// first comment\n // second comment\r\nname"); + + Loci::parse::kill_white_space(in); + + CHECK(in.peek() == 'n'); + CHECK(Loci::parse::get_name(in) == "name"); + CHECK(in.peek() == EOF); +} + +TEST_CASE("kill_white_space preserves a slash that does not start a line comment") { + std::istringstream in(" /value"); + + Loci::parse::kill_white_space(in); + + CHECK(in.peek() == '/'); + CHECK_FALSE(Loci::parse::is_token(in, "//")); + CHECK(read_remaining(in) == "/value"); +} + +TEST_CASE("is_name and get_name parse identifiers and leave trailing delimiters") { + std::istringstream in(" // comment\n_alpha42+rest"); + + CHECK(Loci::parse::is_name(in)); + CHECK(Loci::parse::get_name(in) == "_alpha42"); + CHECK(in.peek() == '+'); +} + +TEST_CASE("get_name returns empty string without consuming non-name input") { + std::istringstream in(" 9alpha"); + + CHECK_FALSE(Loci::parse::is_name(in)); + CHECK(Loci::parse::get_name(in).empty()); + CHECK(in.peek() == '9'); +} + +TEST_CASE("is_int and get_int parse signed integers") { + std::istringstream in(" \t+42,rest"); + + CHECK(Loci::parse::is_int(in)); + CHECK(Loci::parse::get_int(in) == 42); + CHECK(in.peek() == ','); +} + +TEST_CASE("get_int returns zero without consuming invalid input") { + std::istringstream in("alpha"); + + CHECK_FALSE(Loci::parse::is_int(in)); + CHECK(Loci::parse::get_int(in) == 0); + CHECK(in.peek() == 'a'); +} + +TEST_CASE("is_real and get_real parse a broad set of numeric forms") { + struct RealCase { + const char *text; + double value; + const char *rest; + }; + + const std::vector cases = { + {"0", 0.0, ""}, + {"-1.25e2", -125.0, ""}, + {"+.5", 0.5, ""}, + {"7.", 7.0, ""}, + {".75", 0.75, ""}, + {"3E-2", 0.03, ""}, + {".", 0.0, ""}, + {"12.5x", 12.5, "x"}, + }; + + for(const auto &tc : cases) { + CAPTURE(tc.text); + std::istringstream in(tc.text); + + CHECK(Loci::parse::is_real(in)); + CHECK(Loci::parse::get_real(in) == doctest::Approx(tc.value)); + CHECK(read_remaining(in) == tc.rest); + } +} + +TEST_CASE("get_real ignores invalid exponents and leaves the exponent marker in the stream") { + std::istringstream in("12ex"); + + CHECK(Loci::parse::get_real(in) == doctest::Approx(12.0)); + CHECK(in.peek() == 'e'); + CHECK(read_remaining(in) == "ex"); +} + +TEST_CASE("get_real returns zero without consuming invalid non-numeric input") { + std::istringstream in("name"); + + CHECK_FALSE(Loci::parse::is_real(in)); + CHECK(Loci::parse::get_real(in) == doctest::Approx(0.0)); + CHECK(in.peek() == 'n'); +} + +TEST_CASE("is_string and get_string parse quoted text including slashes and spaces") { + std::istringstream in(" // comment\n\"a // b c\" tail"); + + CHECK(Loci::parse::is_string(in)); + CHECK(Loci::parse::get_string(in) == "a // b c"); + + Loci::parse::kill_white_space(in); + CHECK(Loci::parse::get_name(in) == "tail"); +} + +TEST_CASE("get_string supports empty quoted strings and leaves following text") { + std::istringstream in("\"\"next"); + + CHECK(Loci::parse::get_string(in).empty()); + CHECK(Loci::parse::get_name(in) == "next"); +} + +TEST_CASE("get_string returns empty string without consuming non-string input") { + std::istringstream in("name"); + + CHECK_FALSE(Loci::parse::is_string(in)); + CHECK(Loci::parse::get_string(in).empty()); + CHECK(in.peek() == 'n'); +} + +TEST_CASE("get_string returns collected text when the closing quote is missing") { + std::istringstream in("\"unterminated"); + + CHECK(Loci::parse::get_string(in) == "unterminated"); + CHECK(in.eof()); +} + +TEST_CASE("is_token checks for a token without consuming it") { + std::istringstream in(" &&rest"); + + CHECK(Loci::parse::is_token(in, "&&")); + CHECK(in.peek() == '&'); + CHECK(Loci::parse::get_token(in, "&&")); + CHECK(Loci::parse::get_name(in) == "rest"); +} + +TEST_CASE("get_token skips leading whitespace and comments before matching") { + std::istringstream in(" // comment\n::name"); + + CHECK(Loci::parse::get_token(in, "::")); + CHECK(Loci::parse::get_name(in) == "name"); +} + +TEST_CASE("is_token and get_token restore the stream after a partial mismatch") { + { + std::istringstream in("=>"); + CHECK_FALSE(Loci::parse::is_token(in, "==")); + CHECK(read_remaining(in) == "=>"); + } + + { + std::istringstream in("=>"); + CHECK_FALSE(Loci::parse::get_token(in, "==")); + CHECK(read_remaining(in) == "=>"); + } +} diff --git a/src/Tools/parse.cc b/src/Tools/parse.cc index dea84fe7..3364380a 100644 --- a/src/Tools/parse.cc +++ b/src/Tools/parse.cc @@ -29,72 +29,113 @@ namespace Loci { namespace parse { - + using namespace std ; + // ----------------------------------------------------------------------- + /// @brief kill_white_space() advances an input stream past leading white + /// space and line comments beginning with // + /// + /// @param [s] input stream to be positioned at the next non-whitespace, + /// non-comment character void kill_white_space(istream &s) { - + bool flushed_comment ; do { - flushed_comment = false ; - while(!s.eof() && isspace(s.peek())) - s.get() ; - if(s.peek() == '/') { // check for comment - s.get() ; - if(s.peek() == '/') { - while(!s.eof()) { - int ch = s.get() ; - if(ch=='\n' || ch == '\r') - break ; - } - flushed_comment = true ; - } else - s.putback('/') ; - } + flushed_comment = false ; + while(!s.eof() && isspace(s.peek())) + s.get() ; + if(s.peek() == '/') { // check for comment + s.get() ; + if(s.peek() == '/') { + while(!s.eof()) { + int ch = s.get() ; + if(ch=='\n' || ch == '\r') { break ; } + } + flushed_comment = true ; + } else { + s.putback('/') ; + } + } } while(!s.eof() && flushed_comment) ; - + return ; } - + + // ----------------------------------------------------------------------- + /// @brief is_name() checks whether the next token in the stream begins + /// with a valid identifier lead character + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace character is alphabetic or + /// underscore bool is_name(istream &s) { - kill_white_space(s) ; - int ch = s.peek() ; - return isalpha(ch) || ch == '_' ; + kill_white_space(s) ; + int ch = s.peek() ; + return isalpha(ch) || ch == '_' ; } - + + // ----------------------------------------------------------------------- + /// @brief get_name() extracts an identifier token from the input stream + /// + /// @param [s] input stream to consume from + /// @return identifier made of alphanumeric and underscore characters, or + /// an empty string if the next token is not a valid name string get_name(istream &s) { - if(!is_name(s)) - return "" ; - string str ; - while(!s.eof() && (s.peek() != EOF) && - (isalnum(s.peek()) || (s.peek() == '_')) ) - str += s.get() ; - - return str ; + if(!is_name(s)) { return "" ; } + string str ; + while(!s.eof() && (s.peek() != EOF) && + (isalnum(s.peek()) || (s.peek() == '_')) ) { + str += s.get() ; + } + + return str ; } + // ----------------------------------------------------------------------- + /// @brief is_int() checks whether the next token in the stream can begin + /// an integer literal + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace character is a digit or sign bool is_int(istream &s) { - kill_white_space(s) ; - return isdigit(s.peek()) || s.peek()=='-' || s.peek()=='+' ; + kill_white_space(s) ; + return isdigit(s.peek()) || s.peek()=='-' || s.peek()=='+' ; } - + + // ----------------------------------------------------------------------- + /// @brief get_int() extracts an integer value from the input stream + /// + /// @param [s] input stream to consume from + /// @return parsed integer value, or zero if the next token does not begin + /// an integer literal long get_int(istream &s) { - if(!is_int(s)) - return 0 ; - long l = 0 ; - s >> l ; - return l ; + if(!is_int(s)) { return 0 ; } + long l = 0 ; + s >> l ; + return l ; } + // ----------------------------------------------------------------------- + /// @brief is_real() checks whether the next token in the stream can begin + /// a floating-point literal + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace character can start a real + /// number bool is_real(istream &s) { - kill_white_space(s) ; - const char ch = s.peek() ; - return isdigit(ch) || ch=='-' || ch=='+' || ch =='.' ; + kill_white_space(s) ; + const char ch = s.peek() ; + return isdigit(ch) || ch=='-' || ch=='+' || ch =='.' ; } - + + // ----------------------------------------------------------------------- + /// @brief get_real() extracts a floating-point value from the input stream + /// + /// @param [s] input stream to consume from + /// @return parsed floating-point value, or zero if the next token does not + /// begin a real literal double get_real(istream &s) { - if(!is_real(s)) { - return 0.0 ; - } + if(!is_real(s)) { return 0.0 ; } // First grab real into string rval string rval ; @@ -106,100 +147,131 @@ namespace Loci { // any leading digits will go in rval while(isdigit(s.peek())) { - ch = s.get() ; - leading_digit = true ; - rval += ch ; + ch = s.get() ; + leading_digit = true ; + rval += ch ; } // If there is a point, then the point and any following digits will // go into rval if(s.peek() == '.') { - ch = s.get() ; - rval += ch ; - bool trailing_digit = false ; - while(isdigit(s.peek())) { - trailing_digit = true ; - ch = s.get() ; - rval += ch ; - } - if(!leading_digit && !trailing_digit) // convert . to .0 - rval += '0' ; + ch = s.get() ; + rval += ch ; + bool trailing_digit = false ; + while(isdigit(s.peek())) { + trailing_digit = true ; + ch = s.get() ; + rval += ch ; + } + if(!leading_digit && !trailing_digit) { // convert . to .0 + rval += '0' ; + } } // If there is an exponent, check to make sure it is followed by a digit - // if it is then grab the exponent, else put back the character with + // if it is then grab the exponent, else put back the character with // unget if(s.peek() == 'e' || s.peek() == 'E') { - ch = s.get() ; - ch = s.peek() ; - if(isdigit(ch) || ch=='-' || ch=='+') { // valid exponent - rval += 'e' ; - ch = s.get() ; - rval += ch ; - while(isdigit(s.peek())) { - ch = s.get() ; - rval += ch ; - } - } else { // invalid exponent, ignore 'e' or 'E' - s.unget() ; - } + ch = s.get() ; + ch = s.peek() ; + if(isdigit(ch) || ch=='-' || ch=='+') { // valid exponent + rval += 'e' ; + ch = s.get() ; + rval += ch ; + while(isdigit(s.peek())) { + ch = s.get() ; + rval += ch ; + } + } else { // invalid exponent, ignore 'e' or 'E' + s.unget() ; + } } return atof(rval.c_str()) ; } + // ----------------------------------------------------------------------- + /// @brief is_string() checks whether the next token in the stream is a + /// quoted string literal + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace character is a double quote bool is_string(istream &s) { - kill_white_space(s) ; - return s.peek() == '\"' ; + kill_white_space(s) ; + return s.peek() == '\"' ; } - + + // ----------------------------------------------------------------------- + /// @brief get_string() extracts the contents of a quoted string literal + /// + /// @param [s] input stream to consume from + /// @return characters between the opening and closing double quotes, or an + /// empty string if the next token is not a string literal string get_string(istream &s) { - if(!is_string(s)) - return "" ; - string str ; -#ifdef DEBUG - if(s.eof()) - cerr << "s.eof() true in parse::get_string" << endl ; + if(!is_string(s)) { return "" ; } + string str ; +#ifdef DEBUG + if(s.eof()) { + cerr << "s.eof() true in parse::get_string" << endl ; + } #endif - s.get() ; - int ch = s.get() ; - while(ch != '\"' &&!s.eof()) { - str += ch ; - ch = s.get() ; - } + s.get() ; + int ch = s.get() ; + while(ch != '\"' &&!s.eof()) { + str += ch ; + ch = s.get() ; + } #ifdef DEBUG - if(ch!='\"') - cerr << "no closing \" in parse::get_string" << endl ; + if(ch!='\"') { + cerr << "no closing \" in parse::get_string" << endl ; + } #endif - return str ; + return str ; } + // ----------------------------------------------------------------------- + /// @brief is_token() checks whether the next token in the stream matches a + /// specific literal without consuming it + /// + /// @param [s] input stream to inspect + /// @param [token] literal token to compare against the input + /// @return true if the next characters match token exactly bool is_token(istream &s, const string &token) { kill_white_space(s) ; const int sz = token.size() ; for(int i=0;i=0;--i) + for(--i;i>=0;--i) { s.putback(token[i]) ; + } return false ; } s.get() ; } - for(int i=token.size()-1;i>=0;--i) + for(int i=token.size()-1;i>=0;--i) { s.putback(token[i]) ; + } return true ; } - + + // ----------------------------------------------------------------------- + /// @brief get_token() consumes a specific literal token from the input + /// stream if it matches exactly + /// + /// @param [s] input stream to consume from + /// @param [token] literal token expected at the current position + /// @return true if token was matched and consumed, false otherwise bool get_token(istream &s, const string &token) { kill_white_space(s) ; const int sz = token.size() ; for(int i=0;i=0;--i) - s.putback(token[i]) ; - return false ; - } - s.get() ; + if(s.peek() != token[i]) { + for(--i;i>=0;--i) { + s.putback(token[i]) ; + } + return false ; + } + s.get() ; } return true ; } - } + } } From c46c3dabf9731b65fa93fff76ca3f95a2e8aa908 Mon Sep 17 00:00:00 2001 From: Christopher Neal Date: Fri, 10 Apr 2026 20:56:45 -0400 Subject: [PATCH 2/6] Added helper functions and moved comments to header file. Other fixes were (1): sign-only inputs such as "+" and "-" were previously classified as numeric prefixes by is_int()/is_real() (2): malformed exponent suffixes such as "1e+" and "1e-" were partially consumed by get_real()" --- src/Tools/parse.cc | 160 ++++++++++++++++++++------------------------- 1 file changed, 72 insertions(+), 88 deletions(-) diff --git a/src/Tools/parse.cc b/src/Tools/parse.cc index 3364380a..58e17b49 100644 --- a/src/Tools/parse.cc +++ b/src/Tools/parse.cc @@ -31,18 +31,46 @@ namespace Loci { namespace parse { using namespace std ; - // ----------------------------------------------------------------------- - /// @brief kill_white_space() advances an input stream past leading white - /// space and line comments beginning with // - /// - /// @param [s] input stream to be positioned at the next non-whitespace, - /// non-comment character + namespace { + // ctype predicates are only defined for EOF and unsigned-char values. + // This wrapper keeps the stream-facing call sites safe and compact. + bool is_digit(int ch) { + return ch != EOF && isdigit(static_cast(ch)) ; + } + + // parse identifiers with the same leading-character rule used by + // is_name(): alphabetic characters and underscores are allowed. + bool is_name_start(int ch) { + return ch != EOF && + (isalpha(static_cast(ch)) || ch == '_') ; + } + + // is_int() accepts signed literals only when the sign is followed by a + // digit. Peek ahead without consuming the stream. + bool signed_int_starts_here(istream &s) { + const int sign = s.get() ; + const bool ok = is_digit(s.peek()) ; + s.putback(static_cast(sign)) ; + return ok ; + } + + // is_real() accepts signed literals when the sign is followed by either + // a digit or a decimal point. Leave the stream unchanged after probing. + bool signed_real_starts_here(istream &s) { + const int sign = s.get() ; + const int next = s.peek() ; + s.putback(static_cast(sign)) ; + return is_digit(next) || next == '.' ; + } + } + void kill_white_space(istream &s) { bool flushed_comment ; do { flushed_comment = false ; - while(!s.eof() && isspace(s.peek())) + while(!s.eof() && + isspace(static_cast(s.peek()))) s.get() ; if(s.peek() == '/') { // check for comment s.get() ; @@ -61,53 +89,33 @@ namespace Loci { return ; } - // ----------------------------------------------------------------------- - /// @brief is_name() checks whether the next token in the stream begins - /// with a valid identifier lead character - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace character is alphabetic or - /// underscore bool is_name(istream &s) { kill_white_space(s) ; int ch = s.peek() ; - return isalpha(ch) || ch == '_' ; + return is_name_start(ch) ; } - // ----------------------------------------------------------------------- - /// @brief get_name() extracts an identifier token from the input stream - /// - /// @param [s] input stream to consume from - /// @return identifier made of alphanumeric and underscore characters, or - /// an empty string if the next token is not a valid name string get_name(istream &s) { if(!is_name(s)) { return "" ; } string str ; while(!s.eof() && (s.peek() != EOF) && - (isalnum(s.peek()) || (s.peek() == '_')) ) { + (isalnum(static_cast(s.peek())) || + (s.peek() == '_')) ) { str += s.get() ; } return str ; } - // ----------------------------------------------------------------------- - /// @brief is_int() checks whether the next token in the stream can begin - /// an integer literal - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace character is a digit or sign bool is_int(istream &s) { kill_white_space(s) ; - return isdigit(s.peek()) || s.peek()=='-' || s.peek()=='+' ; + const int ch = s.peek() ; + if(is_digit(ch)) { + return true ; + } + return (ch == '-' || ch == '+') && signed_int_starts_here(s) ; } - // ----------------------------------------------------------------------- - /// @brief get_int() extracts an integer value from the input stream - /// - /// @param [s] input stream to consume from - /// @return parsed integer value, or zero if the next token does not begin - /// an integer literal long get_int(istream &s) { if(!is_int(s)) { return 0 ; } long l = 0 ; @@ -115,38 +123,28 @@ namespace Loci { return l ; } - // ----------------------------------------------------------------------- - /// @brief is_real() checks whether the next token in the stream can begin - /// a floating-point literal - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace character can start a real - /// number bool is_real(istream &s) { kill_white_space(s) ; - const char ch = s.peek() ; - return isdigit(ch) || ch=='-' || ch=='+' || ch =='.' ; + const int ch = s.peek() ; + if(is_digit(ch) || ch == '.') { + return true ; + } + return (ch == '-' || ch == '+') && signed_real_starts_here(s) ; } - // ----------------------------------------------------------------------- - /// @brief get_real() extracts a floating-point value from the input stream - /// - /// @param [s] input stream to consume from - /// @return parsed floating-point value, or zero if the next token does not - /// begin a real literal double get_real(istream &s) { if(!is_real(s)) { return 0.0 ; } // First grab real into string rval string rval ; char ch = s.get() ; - rval += ch ; // since aready passing is_real we know first character + rval += ch ; // since we already passed is_real, the first character // is in rval - bool leading_digit = isdigit(ch) ; + bool leading_digit = is_digit(ch) ; // any leading digits will go in rval - while(isdigit(s.peek())) { + while(is_digit(s.peek())) { ch = s.get() ; leading_digit = true ; rval += ch ; @@ -157,7 +155,7 @@ namespace Loci { ch = s.get() ; rval += ch ; bool trailing_digit = false ; - while(isdigit(s.peek())) { + while(is_digit(s.peek())) { trailing_digit = true ; ch = s.get() ; rval += ch ; @@ -167,44 +165,44 @@ namespace Loci { } } // If there is an exponent, check to make sure it is followed by a digit - // if it is then grab the exponent, else put back the character with - // unget + // after an optional sign. Otherwise leave the exponent marker untouched + // so the caller can see the malformed suffix. if(s.peek() == 'e' || s.peek() == 'E') { - ch = s.get() ; - ch = s.peek() ; - if(isdigit(ch) || ch=='-' || ch=='+') { // valid exponent + const char exp = s.get() ; + if(is_digit(s.peek())) { rval += 'e' ; ch = s.get() ; rval += ch ; - while(isdigit(s.peek())) { + while(is_digit(s.peek())) { ch = s.get() ; rval += ch ; } - } else { // invalid exponent, ignore 'e' or 'E' - s.unget() ; + } else if(s.peek() == '-' || s.peek() == '+') { + const char sign = s.get() ; + if(is_digit(s.peek())) { + rval += 'e' ; + rval += sign ; + while(is_digit(s.peek())) { + ch = s.get() ; + rval += ch ; + } + } else { + s.putback(sign) ; + s.putback(exp) ; + } + } else { + s.putback(exp) ; } } return atof(rval.c_str()) ; } - // ----------------------------------------------------------------------- - /// @brief is_string() checks whether the next token in the stream is a - /// quoted string literal - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace character is a double quote bool is_string(istream &s) { kill_white_space(s) ; return s.peek() == '\"' ; } - // ----------------------------------------------------------------------- - /// @brief get_string() extracts the contents of a quoted string literal - /// - /// @param [s] input stream to consume from - /// @return characters between the opening and closing double quotes, or an - /// empty string if the next token is not a string literal string get_string(istream &s) { if(!is_string(s)) { return "" ; } string str ; @@ -227,13 +225,6 @@ namespace Loci { return str ; } - // ----------------------------------------------------------------------- - /// @brief is_token() checks whether the next token in the stream matches a - /// specific literal without consuming it - /// - /// @param [s] input stream to inspect - /// @param [token] literal token to compare against the input - /// @return true if the next characters match token exactly bool is_token(istream &s, const string &token) { kill_white_space(s) ; const int sz = token.size() ; @@ -252,13 +243,6 @@ namespace Loci { return true ; } - // ----------------------------------------------------------------------- - /// @brief get_token() consumes a specific literal token from the input - /// stream if it matches exactly - /// - /// @param [s] input stream to consume from - /// @param [token] literal token expected at the current position - /// @return true if token was matched and consumed, false otherwise bool get_token(istream &s, const string &token) { kill_white_space(s) ; const int sz = token.size() ; From 42d6784b34f09f6101057a2ce321aa76d9c7a0a0 Mon Sep 17 00:00:00 2001 From: Christopher Neal Date: Fri, 10 Apr 2026 20:58:09 -0400 Subject: [PATCH 3/6] Comments added for header functions --- src/include/Tools/parse.h | 69 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/src/include/Tools/parse.h b/src/include/Tools/parse.h index 436dbb20..f0a04ae1 100644 --- a/src/include/Tools/parse.h +++ b/src/include/Tools/parse.h @@ -32,22 +32,87 @@ namespace Loci { namespace parse { - + + /// @brief Advances an input stream past leading white space and line + /// comments beginning with `//`. + /// + /// @param [s] input stream to be positioned at the next non-whitespace, + /// non-comment character void kill_white_space(std::istream &s) ; - + + /// @brief Checks whether the next token in the stream begins with a valid + /// identifier lead character. + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace character is alphabetic or + /// underscore bool is_name(std::istream &s) ; + + /// @brief Extracts an identifier token from the input stream. + /// + /// @param [s] input stream to consume from + /// @return identifier made of alphanumeric and underscore characters, or + /// an empty string if the next token is not a valid name std::string get_name(std::istream &s) ; + /// @brief Checks whether the next token in the stream can begin a valid + /// integer literal. + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace token begins with digits or + /// with a sign followed by digits bool is_int(std::istream &s) ; + + /// @brief Extracts an integer value from the input stream. + /// + /// @param [s] input stream to consume from + /// @return parsed integer value, or zero if the next token does not begin + /// an integer literal long get_int(std::istream &s) ; + /// @brief Checks whether the next token in the stream can begin a valid + /// floating-point literal. + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace token begins with digits, a + /// decimal point, or a sign followed by digits or a decimal point bool is_real(std::istream &s) ; + + /// @brief Extracts a floating-point value from the input stream. + /// + /// @param [s] input stream to consume from + /// @return parsed floating-point value, or zero if the next token does not + /// begin a real literal double get_real(std::istream &s) ; + /// @brief Checks whether the next token in the stream is a quoted string + /// literal. + /// + /// @param [s] input stream to inspect + /// @return true if the next non-whitespace character is a double quote bool is_string(std::istream &s) ; + + /// @brief Extracts the contents of a quoted string literal. + /// + /// @param [s] input stream to consume from + /// @return characters between the opening and closing double quotes, or an + /// empty string if the next token is not a string literal std::string get_string(std::istream &s) ; + /// @brief Checks whether the next token in the stream matches a specific + /// literal without consuming it. + /// + /// @param [s] input stream to inspect + /// @param [token] literal token to compare against the input + /// @return true if the next characters match token exactly bool is_token(std::istream &s, const std::string &token) ; + + /// @brief Consumes a specific literal token from the input stream if it + /// matches exactly. + /// + /// @param [s] input stream to consume from + /// @param [token] literal token expected at the current position + /// @return true if token was matched and consumed, false otherwise bool get_token(std::istream &s, const std::string &token) ; } } From fbf12fdf6d78ea37c55a374a426799f1e16add23 Mon Sep 17 00:00:00 2001 From: Christopher Neal Date: Fri, 10 Apr 2026 20:58:20 -0400 Subject: [PATCH 4/6] tweak to tests --- quickTest/Tools/test_parse.cc | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/quickTest/Tools/test_parse.cc b/quickTest/Tools/test_parse.cc index 6636b277..97dd5568 100644 --- a/quickTest/Tools/test_parse.cc +++ b/quickTest/Tools/test_parse.cc @@ -8,10 +8,6 @@ #include -#ifndef LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS -#define LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS 0 -#endif - namespace { std::string read_remaining(std::istream &in) { return std::string(std::istreambuf_iterator(in), @@ -19,20 +15,7 @@ std::string read_remaining(std::istream &in) { } } // namespace -//---------------------------------------------------------------------------- -// Potential Bugs -//---------------------------------------------------------------------------- -// -// These are cases that appear to demonstrate behavior that is not intended. -// -// Enable them with LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS=1 when we want to -// revisit these behaviors. - -// The parser already treats "12ex" as the valid prefix "12" with "ex" left in -// the stream. Malformed exponent tails such as "e+" and "e-" should behave the -// same way instead of being silently consumed. -TEST_CASE("known bug: get_real should not consume incomplete exponent suffixes [known-bug]" * - doctest::skip(LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS == 0)) { +TEST_CASE("get_real leaves incomplete exponent suffixes unread") { struct RealCase { const char *text; const char *rest; @@ -53,11 +36,7 @@ TEST_CASE("known bug: get_real should not consume incomplete exponent suffixes [ } } -// is_int() and is_real() currently treat a leading + or - as sufficient to -// begin a number, even when no digits follow. That leaks upward into callers -// like options_list and UNIT_type, which can accept "+" or "-" as zero. -TEST_CASE("known bug: bare signs should not be classified as numeric literals [known-bug]" * - doctest::skip(LOCI_ENABLE_PARSE_KNOWN_BUG_TESTS == 0)) { +TEST_CASE("numeric probes reject bare signs without digits") { const std::vector cases = { "+", "-", From 2a64649ac8c3a1886c607c24f4e2df1e7b57752c Mon Sep 17 00:00:00 2001 From: Christopher Neal Date: Sun, 12 Apr 2026 02:24:40 -0400 Subject: [PATCH 5/6] comments for tests and doctest may_fail() added so failing tests don't stop the suite --- quickTest/Tools/test_parse.cc | 49 +++++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/quickTest/Tools/test_parse.cc b/quickTest/Tools/test_parse.cc index 97dd5568..f69c46cf 100644 --- a/quickTest/Tools/test_parse.cc +++ b/quickTest/Tools/test_parse.cc @@ -9,13 +9,24 @@ #include namespace { +/// @brief Returns every unread character left in `in`. std::string read_remaining(std::istream &in) { return std::string(std::istreambuf_iterator(in), std::istreambuf_iterator()); } } // namespace -TEST_CASE("get_real leaves incomplete exponent suffixes unread") { +//---------------------------------------------------------------------------- +// Potential Bugs +//---------------------------------------------------------------------------- +// +// These stay in the suite as visible regressions without breaking the overall +// test run. + +// `get_real()` currently consumes incomplete exponent suffixes such as `e+` +// instead of leaving them unread for the caller. +TEST_CASE("get_real leaves incomplete exponent suffixes unread [known-bug]" * + doctest::may_fail()) { struct RealCase { const char *text; const char *rest; @@ -30,13 +41,19 @@ TEST_CASE("get_real leaves incomplete exponent suffixes unread") { CAPTURE(tc.text); std::istringstream in(tc.text); + // The numeric probe should accept the leading real value. CHECK(Loci::parse::is_real(in)); CHECK(Loci::parse::get_real(in) == doctest::Approx(1.0)); + + // The incomplete exponent suffix should remain unread for the caller. CHECK(read_remaining(in) == tc.rest); } } -TEST_CASE("numeric probes reject bare signs without digits") { +// `is_int()` and `is_real()` currently accept bare signs and sign-prefixed +// identifiers even when no digits follow. +TEST_CASE("numeric probes reject bare signs without digits [known-bug]" * + doctest::may_fail()) { const std::vector cases = { "+", "-", @@ -49,6 +66,7 @@ TEST_CASE("numeric probes reject bare signs without digits") { std::istringstream int_in(tc); std::istringstream real_in(tc); + // Bare signs and sign-prefixed identifiers are not valid integers or reals. CHECK_FALSE(Loci::parse::is_int(int_in)); CHECK_FALSE(Loci::parse::is_real(real_in)); } @@ -57,8 +75,10 @@ TEST_CASE("numeric probes reject bare signs without digits") { TEST_CASE("kill_white_space skips leading whitespace and line comments") { std::istringstream in(" \t\n// first comment\n // second comment\r\nname"); + // Leading whitespace and whole-line comments should be skipped together. Loci::parse::kill_white_space(in); + // The next token in the stream should now be the identifier itself. CHECK(in.peek() == 'n'); CHECK(Loci::parse::get_name(in) == "name"); CHECK(in.peek() == EOF); @@ -67,6 +87,7 @@ TEST_CASE("kill_white_space skips leading whitespace and line comments") { TEST_CASE("kill_white_space preserves a slash that does not start a line comment") { std::istringstream in(" /value"); + // A slash that is not the start of `//` should be left alone. Loci::parse::kill_white_space(in); CHECK(in.peek() == '/'); @@ -77,6 +98,7 @@ TEST_CASE("kill_white_space preserves a slash that does not start a line comment TEST_CASE("is_name and get_name parse identifiers and leave trailing delimiters") { std::istringstream in(" // comment\n_alpha42+rest"); + // Identifier parsing should skip comments and stop cleanly at the delimiter. CHECK(Loci::parse::is_name(in)); CHECK(Loci::parse::get_name(in) == "_alpha42"); CHECK(in.peek() == '+'); @@ -85,6 +107,7 @@ TEST_CASE("is_name and get_name parse identifiers and leave trailing delimiters" TEST_CASE("get_name returns empty string without consuming non-name input") { std::istringstream in(" 9alpha"); + // A non-identifier head should be rejected without advancing the stream. CHECK_FALSE(Loci::parse::is_name(in)); CHECK(Loci::parse::get_name(in).empty()); CHECK(in.peek() == '9'); @@ -93,14 +116,18 @@ TEST_CASE("get_name returns empty string without consuming non-name input") { TEST_CASE("is_int and get_int parse signed integers") { std::istringstream in(" \t+42,rest"); + // Signed integers should parse after leading whitespace. CHECK(Loci::parse::is_int(in)); CHECK(Loci::parse::get_int(in) == 42); + + // Parsing should stop at the first non-integer delimiter. CHECK(in.peek() == ','); } TEST_CASE("get_int returns zero without consuming invalid input") { std::istringstream in("alpha"); + // Invalid integer input should leave the stream untouched. CHECK_FALSE(Loci::parse::is_int(in)); CHECK(Loci::parse::get_int(in) == 0); CHECK(in.peek() == 'a'); @@ -128,8 +155,11 @@ TEST_CASE("is_real and get_real parse a broad set of numeric forms") { CAPTURE(tc.text); std::istringstream in(tc.text); + // The parser should accept each supported numeric spelling. CHECK(Loci::parse::is_real(in)); CHECK(Loci::parse::get_real(in) == doctest::Approx(tc.value)); + + // Any trailing non-numeric suffix should remain available to the caller. CHECK(read_remaining(in) == tc.rest); } } @@ -137,6 +167,7 @@ TEST_CASE("is_real and get_real parse a broad set of numeric forms") { TEST_CASE("get_real ignores invalid exponents and leaves the exponent marker in the stream") { std::istringstream in("12ex"); + // Parsing should stop before the invalid exponent marker. CHECK(Loci::parse::get_real(in) == doctest::Approx(12.0)); CHECK(in.peek() == 'e'); CHECK(read_remaining(in) == "ex"); @@ -145,6 +176,7 @@ TEST_CASE("get_real ignores invalid exponents and leaves the exponent marker in TEST_CASE("get_real returns zero without consuming invalid non-numeric input") { std::istringstream in("name"); + // Non-numeric input should be rejected without consuming the leading text. CHECK_FALSE(Loci::parse::is_real(in)); CHECK(Loci::parse::get_real(in) == doctest::Approx(0.0)); CHECK(in.peek() == 'n'); @@ -153,9 +185,11 @@ TEST_CASE("get_real returns zero without consuming invalid non-numeric input") { TEST_CASE("is_string and get_string parse quoted text including slashes and spaces") { std::istringstream in(" // comment\n\"a // b c\" tail"); + // Quoted strings should preserve internal spaces and slash sequences. CHECK(Loci::parse::is_string(in)); CHECK(Loci::parse::get_string(in) == "a // b c"); + // After consuming the string, the trailing identifier should still parse. Loci::parse::kill_white_space(in); CHECK(Loci::parse::get_name(in) == "tail"); } @@ -163,6 +197,7 @@ TEST_CASE("is_string and get_string parse quoted text including slashes and spac TEST_CASE("get_string supports empty quoted strings and leaves following text") { std::istringstream in("\"\"next"); + // Empty quoted strings should parse successfully and leave following text. CHECK(Loci::parse::get_string(in).empty()); CHECK(Loci::parse::get_name(in) == "next"); } @@ -170,6 +205,7 @@ TEST_CASE("get_string supports empty quoted strings and leaves following text") TEST_CASE("get_string returns empty string without consuming non-string input") { std::istringstream in("name"); + // Non-string input should remain untouched when no opening quote is present. CHECK_FALSE(Loci::parse::is_string(in)); CHECK(Loci::parse::get_string(in).empty()); CHECK(in.peek() == 'n'); @@ -178,6 +214,7 @@ TEST_CASE("get_string returns empty string without consuming non-string input") TEST_CASE("get_string returns collected text when the closing quote is missing") { std::istringstream in("\"unterminated"); + // Unterminated strings currently return the collected payload and consume the stream. CHECK(Loci::parse::get_string(in) == "unterminated"); CHECK(in.eof()); } @@ -185,8 +222,11 @@ TEST_CASE("get_string returns collected text when the closing quote is missing") TEST_CASE("is_token checks for a token without consuming it") { std::istringstream in(" &&rest"); + // Token probing should not consume the token on success. CHECK(Loci::parse::is_token(in, "&&")); CHECK(in.peek() == '&'); + + // A subsequent `get_token()` should then consume it normally. CHECK(Loci::parse::get_token(in, "&&")); CHECK(Loci::parse::get_name(in) == "rest"); } @@ -194,6 +234,7 @@ TEST_CASE("is_token checks for a token without consuming it") { TEST_CASE("get_token skips leading whitespace and comments before matching") { std::istringstream in(" // comment\n::name"); + // Token matching should honor the same whitespace/comment skipping rules. CHECK(Loci::parse::get_token(in, "::")); CHECK(Loci::parse::get_name(in) == "name"); } @@ -201,12 +242,16 @@ TEST_CASE("get_token skips leading whitespace and comments before matching") { TEST_CASE("is_token and get_token restore the stream after a partial mismatch") { { std::istringstream in("=>"); + + // A failed probe should restore the stream to its original position. CHECK_FALSE(Loci::parse::is_token(in, "==")); CHECK(read_remaining(in) == "=>"); } { std::istringstream in("=>"); + + // A failed consuming match should likewise leave the input untouched. CHECK_FALSE(Loci::parse::get_token(in, "==")); CHECK(read_remaining(in) == "=>"); } From 185a72a83354e5a636bac809ad9056e44de62dd1 Mon Sep 17 00:00:00 2001 From: Christopher Neal Date: Thu, 14 May 2026 00:04:15 -0400 Subject: [PATCH 6/6] remove parse source edits from test branch --- src/Tools/parse.cc | 266 +++++++++++++++----------------------- src/include/Tools/parse.h | 69 +--------- 2 files changed, 107 insertions(+), 228 deletions(-) diff --git a/src/Tools/parse.cc b/src/Tools/parse.cc index 58e17b49..dea84fe7 100644 --- a/src/Tools/parse.cc +++ b/src/Tools/parse.cc @@ -29,200 +29,147 @@ namespace Loci { namespace parse { - + using namespace std ; - namespace { - // ctype predicates are only defined for EOF and unsigned-char values. - // This wrapper keeps the stream-facing call sites safe and compact. - bool is_digit(int ch) { - return ch != EOF && isdigit(static_cast(ch)) ; - } - - // parse identifiers with the same leading-character rule used by - // is_name(): alphabetic characters and underscores are allowed. - bool is_name_start(int ch) { - return ch != EOF && - (isalpha(static_cast(ch)) || ch == '_') ; - } - - // is_int() accepts signed literals only when the sign is followed by a - // digit. Peek ahead without consuming the stream. - bool signed_int_starts_here(istream &s) { - const int sign = s.get() ; - const bool ok = is_digit(s.peek()) ; - s.putback(static_cast(sign)) ; - return ok ; - } - - // is_real() accepts signed literals when the sign is followed by either - // a digit or a decimal point. Leave the stream unchanged after probing. - bool signed_real_starts_here(istream &s) { - const int sign = s.get() ; - const int next = s.peek() ; - s.putback(static_cast(sign)) ; - return is_digit(next) || next == '.' ; - } - } - void kill_white_space(istream &s) { - + bool flushed_comment ; do { - flushed_comment = false ; - while(!s.eof() && - isspace(static_cast(s.peek()))) - s.get() ; - if(s.peek() == '/') { // check for comment - s.get() ; - if(s.peek() == '/') { - while(!s.eof()) { - int ch = s.get() ; - if(ch=='\n' || ch == '\r') { break ; } - } - flushed_comment = true ; - } else { - s.putback('/') ; - } - } + flushed_comment = false ; + while(!s.eof() && isspace(s.peek())) + s.get() ; + if(s.peek() == '/') { // check for comment + s.get() ; + if(s.peek() == '/') { + while(!s.eof()) { + int ch = s.get() ; + if(ch=='\n' || ch == '\r') + break ; + } + flushed_comment = true ; + } else + s.putback('/') ; + } } while(!s.eof() && flushed_comment) ; - + return ; } - + bool is_name(istream &s) { - kill_white_space(s) ; - int ch = s.peek() ; - return is_name_start(ch) ; + kill_white_space(s) ; + int ch = s.peek() ; + return isalpha(ch) || ch == '_' ; } - + string get_name(istream &s) { - if(!is_name(s)) { return "" ; } - string str ; - while(!s.eof() && (s.peek() != EOF) && - (isalnum(static_cast(s.peek())) || - (s.peek() == '_')) ) { - str += s.get() ; - } - - return str ; + if(!is_name(s)) + return "" ; + string str ; + while(!s.eof() && (s.peek() != EOF) && + (isalnum(s.peek()) || (s.peek() == '_')) ) + str += s.get() ; + + return str ; } bool is_int(istream &s) { - kill_white_space(s) ; - const int ch = s.peek() ; - if(is_digit(ch)) { - return true ; - } - return (ch == '-' || ch == '+') && signed_int_starts_here(s) ; + kill_white_space(s) ; + return isdigit(s.peek()) || s.peek()=='-' || s.peek()=='+' ; } - + long get_int(istream &s) { - if(!is_int(s)) { return 0 ; } - long l = 0 ; - s >> l ; - return l ; + if(!is_int(s)) + return 0 ; + long l = 0 ; + s >> l ; + return l ; } bool is_real(istream &s) { - kill_white_space(s) ; - const int ch = s.peek() ; - if(is_digit(ch) || ch == '.') { - return true ; - } - return (ch == '-' || ch == '+') && signed_real_starts_here(s) ; + kill_white_space(s) ; + const char ch = s.peek() ; + return isdigit(ch) || ch=='-' || ch=='+' || ch =='.' ; } - + double get_real(istream &s) { - if(!is_real(s)) { return 0.0 ; } + if(!is_real(s)) { + return 0.0 ; + } // First grab real into string rval string rval ; char ch = s.get() ; - rval += ch ; // since we already passed is_real, the first character + rval += ch ; // since aready passing is_real we know first character // is in rval - bool leading_digit = is_digit(ch) ; + bool leading_digit = isdigit(ch) ; // any leading digits will go in rval - while(is_digit(s.peek())) { - ch = s.get() ; - leading_digit = true ; - rval += ch ; + while(isdigit(s.peek())) { + ch = s.get() ; + leading_digit = true ; + rval += ch ; } // If there is a point, then the point and any following digits will // go into rval if(s.peek() == '.') { - ch = s.get() ; - rval += ch ; - bool trailing_digit = false ; - while(is_digit(s.peek())) { - trailing_digit = true ; - ch = s.get() ; - rval += ch ; - } - if(!leading_digit && !trailing_digit) { // convert . to .0 - rval += '0' ; - } + ch = s.get() ; + rval += ch ; + bool trailing_digit = false ; + while(isdigit(s.peek())) { + trailing_digit = true ; + ch = s.get() ; + rval += ch ; + } + if(!leading_digit && !trailing_digit) // convert . to .0 + rval += '0' ; } // If there is an exponent, check to make sure it is followed by a digit - // after an optional sign. Otherwise leave the exponent marker untouched - // so the caller can see the malformed suffix. + // if it is then grab the exponent, else put back the character with + // unget if(s.peek() == 'e' || s.peek() == 'E') { - const char exp = s.get() ; - if(is_digit(s.peek())) { - rval += 'e' ; - ch = s.get() ; - rval += ch ; - while(is_digit(s.peek())) { - ch = s.get() ; - rval += ch ; - } - } else if(s.peek() == '-' || s.peek() == '+') { - const char sign = s.get() ; - if(is_digit(s.peek())) { - rval += 'e' ; - rval += sign ; - while(is_digit(s.peek())) { - ch = s.get() ; - rval += ch ; - } - } else { - s.putback(sign) ; - s.putback(exp) ; - } - } else { - s.putback(exp) ; - } + ch = s.get() ; + ch = s.peek() ; + if(isdigit(ch) || ch=='-' || ch=='+') { // valid exponent + rval += 'e' ; + ch = s.get() ; + rval += ch ; + while(isdigit(s.peek())) { + ch = s.get() ; + rval += ch ; + } + } else { // invalid exponent, ignore 'e' or 'E' + s.unget() ; + } } return atof(rval.c_str()) ; } bool is_string(istream &s) { - kill_white_space(s) ; - return s.peek() == '\"' ; + kill_white_space(s) ; + return s.peek() == '\"' ; } - + string get_string(istream &s) { - if(!is_string(s)) { return "" ; } - string str ; -#ifdef DEBUG - if(s.eof()) { - cerr << "s.eof() true in parse::get_string" << endl ; - } + if(!is_string(s)) + return "" ; + string str ; +#ifdef DEBUG + if(s.eof()) + cerr << "s.eof() true in parse::get_string" << endl ; #endif - s.get() ; - int ch = s.get() ; - while(ch != '\"' &&!s.eof()) { - str += ch ; - ch = s.get() ; - } + s.get() ; + int ch = s.get() ; + while(ch != '\"' &&!s.eof()) { + str += ch ; + ch = s.get() ; + } #ifdef DEBUG - if(ch!='\"') { - cerr << "no closing \" in parse::get_string" << endl ; - } + if(ch!='\"') + cerr << "no closing \" in parse::get_string" << endl ; #endif - return str ; + return str ; } bool is_token(istream &s, const string &token) { @@ -230,32 +177,29 @@ namespace Loci { const int sz = token.size() ; for(int i=0;i=0;--i) { + for(--i;i>=0;--i) s.putback(token[i]) ; - } return false ; } s.get() ; } - for(int i=token.size()-1;i>=0;--i) { + for(int i=token.size()-1;i>=0;--i) s.putback(token[i]) ; - } return true ; } - + bool get_token(istream &s, const string &token) { kill_white_space(s) ; const int sz = token.size() ; for(int i=0;i=0;--i) { - s.putback(token[i]) ; - } - return false ; - } - s.get() ; + if(s.peek() != token[i]) { + for(--i;i>=0;--i) + s.putback(token[i]) ; + return false ; + } + s.get() ; } return true ; } - } + } } diff --git a/src/include/Tools/parse.h b/src/include/Tools/parse.h index f0a04ae1..436dbb20 100644 --- a/src/include/Tools/parse.h +++ b/src/include/Tools/parse.h @@ -32,87 +32,22 @@ namespace Loci { namespace parse { - - /// @brief Advances an input stream past leading white space and line - /// comments beginning with `//`. - /// - /// @param [s] input stream to be positioned at the next non-whitespace, - /// non-comment character + void kill_white_space(std::istream &s) ; - - /// @brief Checks whether the next token in the stream begins with a valid - /// identifier lead character. - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace character is alphabetic or - /// underscore + bool is_name(std::istream &s) ; - - /// @brief Extracts an identifier token from the input stream. - /// - /// @param [s] input stream to consume from - /// @return identifier made of alphanumeric and underscore characters, or - /// an empty string if the next token is not a valid name std::string get_name(std::istream &s) ; - /// @brief Checks whether the next token in the stream can begin a valid - /// integer literal. - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace token begins with digits or - /// with a sign followed by digits bool is_int(std::istream &s) ; - - /// @brief Extracts an integer value from the input stream. - /// - /// @param [s] input stream to consume from - /// @return parsed integer value, or zero if the next token does not begin - /// an integer literal long get_int(std::istream &s) ; - /// @brief Checks whether the next token in the stream can begin a valid - /// floating-point literal. - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace token begins with digits, a - /// decimal point, or a sign followed by digits or a decimal point bool is_real(std::istream &s) ; - - /// @brief Extracts a floating-point value from the input stream. - /// - /// @param [s] input stream to consume from - /// @return parsed floating-point value, or zero if the next token does not - /// begin a real literal double get_real(std::istream &s) ; - /// @brief Checks whether the next token in the stream is a quoted string - /// literal. - /// - /// @param [s] input stream to inspect - /// @return true if the next non-whitespace character is a double quote bool is_string(std::istream &s) ; - - /// @brief Extracts the contents of a quoted string literal. - /// - /// @param [s] input stream to consume from - /// @return characters between the opening and closing double quotes, or an - /// empty string if the next token is not a string literal std::string get_string(std::istream &s) ; - /// @brief Checks whether the next token in the stream matches a specific - /// literal without consuming it. - /// - /// @param [s] input stream to inspect - /// @param [token] literal token to compare against the input - /// @return true if the next characters match token exactly bool is_token(std::istream &s, const std::string &token) ; - - /// @brief Consumes a specific literal token from the input stream if it - /// matches exactly. - /// - /// @param [s] input stream to consume from - /// @param [token] literal token expected at the current position - /// @return true if token was matched and consumed, false otherwise bool get_token(std::istream &s, const std::string &token) ; } }