OPENDAP · jgallagher59701 · Dec 21, 2025 · Dec 21, 2025 · Dec 22, 2025 · Dec 22, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -20,7 +20,7 @@ exclude: |
 repos:
   # --- Baseline sanity checks (fast, safe) ---
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v6.0.0
     hooks:
       - id: end-of-file-fixer
         # This is lame, and there's supposed to be a better way, but I cannot
@@ -66,6 +66,6 @@ repos:
       - id: prettier
         args: ["--no-cache"] # Problems with caching on OSX. jhrg 10/31/25
         # Choose what Prettier formats:
-        types_or: [json, yaml, markdown]
+        types_or: [json, yaml]
         additional_dependencies:
           - prettier@3.6.0
diff --git a/.prettierignore b/.prettierignore
@@ -11,5 +11,7 @@ unit-tests/diff-match-patch
 tests/*-testsuite/**
 tests/dmrset/**
 
+retired/**
+
 # Ignore other non-source text
 build/**
diff --git a/README.cmake.md b/README.cmake.md
@@ -1,6 +1,6 @@
 # Using cmake to build libdap4
 
-## how to build outside the source tree
+## How to build outside the source tree
 
 Make a build directory (or build/libdap4 if libdap4 is a git submodule).
 Then, in that directory, run cmake with the source directory as a command line
@@ -70,3 +70,70 @@ Time to run the unit tests:
 Total Test time (real) =   2.35 sec
 make test -j20  2.20s user 0.14s system 98% cpu 2.381 total
 ```
+
+## Using Cmake Presets
+
+There are several Presets defined that combine several cmake switches
+in one setting. For example
+
+```bash
+cmake --preset developer
+```
+
+uses the following options to configure the build:
+
+```
+Preset CMake variables:
+
+  BIG_ARRAY_TEST="OFF"
+  BUILD_DEVELOPER="ON"
+  BUILD_TESTING="ON"
+  CMAKE_BUILD_TYPE="Debug"
+  CMAKE_CXX_STANDARD="14"
+  CMAKE_CXX_STANDARD_REQUIRED="ON"
+  CMAKE_INSTALL_PREFIX="$prefix"
+  USE_ASAN="OFF"
+  USE_CPP_11_REGEX="ON"
+```
+
+Note that using the preset makes the ```build``` directory and will configure
+the build to use a directory named for the preset under the ```build``` directory.
+For the _developer_ preset, that will be ```build/developer```:
+
+```commandline
+hyrax_git/libdap4 % ls build/developer
+cmake_install.cmake		d4_ce/				http_dap/			Testing/
+CMakeCache.txt			d4_function/		libdap.pc			tests/
+CMakeFiles/			    dap-config			libdap4Config.cmake	unit-tests/
+config.h			    DartConfiguration.tcl libdap4ConfigVersion.cmake	xdr-datatypes.h
+CTestTestfile.cmake		dods-datatypes.h	Makefile
+```
+
+You can run the build using cmake from the top -level of the repo like this:
+
+```bash
+cmake --build . --preset developer --parallel
+```
+
+and run the tests like this:
+
+```bash
+ctest --preset developer
+```
+
+The ```ctest``` program does that ```--parallel``` but don't use that with libdap
+until the test tolls are made thread safe. If you do use ```--parallel``` by mistake
+then clean the dirs and re-run the tests. Until the tests are more solid, it might
+be best to just ```rm -rf build``` and start over. There are special targets to
+clean out the temp files made by the tests, that target is called _clean-tests_.
+
+To use ```cmake``` to run a custom target, use this syntax:
+
+```bash
+cmake --build . --preset developer --parallel --target clean-tests
+```
+
+## You can still use _make_
+
+To use _make_, just cd to ```build/developer``` and run the usual commands to
+build an install the code. Use the target _test_ to run the tests.
diff --git a/d4_ce/D4CEScanner.h b/d4_ce/D4CEScanner.h
@@ -50,7 +50,7 @@ namespace libdap {
 
 class D4CEScanner : public d4_ceFlexLexer {
 public:
-    D4CEScanner(std::istream &in) : d4_ceFlexLexer(&in), yylval(0), loc(0) {};
+    explicit D4CEScanner(std::istream &in) : d4_ceFlexLexer(&in), yylval(nullptr), loc(nullptr) {};
 
     int yylex(libdap::D4CEParser::semantic_type *lval, libdap::location *l) {
         loc = l;
@@ -60,7 +60,7 @@ class D4CEScanner : public d4_ceFlexLexer {
 
 private:
     /* hide this one from public view */
-    int yylex();
+    int yylex() override;
 
     /* yyval ptr */
     libdap::D4CEParser::semantic_type *yylval;

diff --git a/d4_ce/D4ConstraintEvaluator.h b/d4_ce/D4ConstraintEvaluator.h
@@ -61,9 +61,9 @@ class D4ConstraintEvaluator {
         bool empty = false;
         // When a slice is applied to an Array with Maps, we need to know the name of
         // each dimension. These names are then used to apply the slice to each of the
-        // Maps (Maps may have fewer dimensions than the Array, but the idea that a
+        // Maps. Maps may have fewer dimensions than the Array, but the idea that a
         // Map is a simple vector doesn't hold for DAP4, so the mapping between a slice's
-        // indexes and the set of Maps can be complex - use the names to make sure
+        // indexes and the set of Maps can be complex. Use the names to make sure
         // all cases are covered. The value of this field may be empty.
         std::string dim_name;
 
@@ -83,6 +83,16 @@ class D4ConstraintEvaluator {
     static index make_index(const std::string &i, const std::string &s);
     static index make_index(const std::string &i, int64_t s);
 
+    // For now, if a value-based subset is given, return the entire dimension. jhrg 12/23/25
+    // start and end
+    static index make_value_based_index(const std::string &, const std::string &) {
+        return index(0, 1, 0, false, false, "");
+    }
+    // start, stride, end
+    static index make_value_based_index(const std::string &, const std::string &, const std::string &) {
+        return index(0, 1, 0, false, false, "");
+    }
+
     bool d_trace_scanning = false;
     bool d_trace_parsing = false;
     bool d_result = false;

diff --git a/d4_ce/d4_ce_parser.yy b/d4_ce/d4_ce_parser.yy
@@ -99,6 +99,7 @@ namespace libdap {
 
 // The strings used in the token definitions are used for error messages
 %token <std::string> WORD "word"
+%token <std::string> VALUE "value"
 %token <std::string> STRING "string"
 
 // %type is used to set the return type of non-terminals; %token sets the
@@ -313,6 +314,19 @@ index   : "[" "]" { $$ = driver.make_index(); }
 | "[" WORD ":" WORD ":" WORD "]" { $$ = driver.make_index($2, $4, $6); }
 | "[" WORD ":" "]" { $$ = driver.make_index($2, 1); }
 | "[" WORD ":" WORD ":" "]" { $$ = driver.make_index($2, $4); }
+
+// Value-based subsetting rules. the stride must be an integer.
+// I don't think this makes much sense. "[" VALUE "]"
+| "[" VALUE ":" VALUE "]" { $$ = driver.make_value_based_index($2, $4); }
+| "[" VALUE ":" WORD ":" VALUE "]" { $$ = driver.make_value_based_index($2, $4, $6); }
+
+// Value-based subsetting starting with a value and going to the end.
+// Add these once the initial two are working. jhrg 12/23/25
+// | "[" VALUE ":" "]" { $$ = driver.make_value_based_index($2, 1); }
+// | "[" VALUE ":" WORD ":" "]" { $$ = driver.make_value_based_index($2, $4); }
+// Value-based subsetting starting at the beginning and going to a value.
+// | "[" ":" VALUE "]" { $$ = driver.make_index($2, 1); }
+// | "[" ":" VALUE ":" WORD "]" { $$ = driver.make_index($2, $4); }
 ;
 
 fields : "{" clauses "}" { $$ = $2; }
@@ -409,7 +423,7 @@ path : name
 
 // Because some formats/datasets allow 'any' name for a variable, it's possible
 // that a variable name will be a number, etc. The grammar also allows STRING
-// to support "name"."name with spaces and dots (.)".x
+// to support "name"."name with spaces and dots (.)".
 //
 // I added calls here to remove the double quotes because they were breaking
 // the parse for STRINGs and also added www2id() for WORDs (so that %20, etc.

diff --git a/d4_ce/d4_ce_scanner.ll b/d4_ce/d4_ce_scanner.ll
@@ -83,13 +83,17 @@ typedef libdap::D4CEParser::token token;
 
 %option batch
 
+/* quote is a special state for quoted strings. value is a special state
+   for values that are used for value-based-subsetting. Values for subsetting
+   cannot contain escaped characters. jhrg 12/22/25 */
 %x quote
+%x value
 
 /* This pattern just ensures that a word does not start with '#' which
    is the DAP2 comment character.
 
    Having the characters !, ~, and @ in the second set of the chars allowed
-   in a WORD token meant that 'var!=' parsed as WORD == 'var!' and '=' ane not
+   in a WORD token meant that 'var!=' parsed as WORD == 'var!' and '=' and not
    'var' and '!='. I see that in DAP2 I did not include these in the definition
    of a WORD.
    jhrg 4/29/16 */
@@ -166,6 +170,22 @@ loc->step();
                   YY_FATAL_ERROR("Unterminated quote");
                 }
 
+[(]    { BEGIN(value); yymore(); }
+
+<value>[^)]*  yymore(); /* Anything that's not a right paren */
+
+<value>[)]  {
+                /* A right paren in the 'value' state indicates the end of the subset value */
+                BEGIN(INITIAL);
+                yylval->build<std::string>(yytext);
+                return token::VALUE;
+            }
+
+<value><<EOF>>	{
+                  BEGIN(INITIAL);   /* resetting the state is needed for reentrant parsers */
+                  YY_FATAL_ERROR("Unterminated subset value");
+                }
+
 .   {
         BEGIN(INITIAL);
         if (yytext) {

diff --git a/d4_ce/unit-tests/D4ConstraintEvaluatorTest.cc b/d4_ce/unit-tests/D4ConstraintEvaluatorTest.cc
@@ -125,7 +125,7 @@ class D4ConstraintEvaluatorTest : public CppUnit::TestFixture {
             D4ConstraintEvaluator::throw_not_array("id", "ident");
             CPPUNIT_FAIL("Expected throw_not_array to throw an exception");
         } catch (const Error &e) {
-            // Verify specific error message and details (if applicable)
+            // Verify a specific error message and details (if applicable)
             CPPUNIT_ASSERT_EQUAL(e.get_error_code(), no_such_variable);
             // Additional assertions for message details based on implementation
         }

diff --git a/d4_ce/D4ConstraintEvaluator.cc.nat_axes → ...d/d4_ce/D4ConstraintEvaluator.cc.nat_axes b/d4_ce/D4ConstraintEvaluator.cc.nat_axes → ...d/d4_ce/D4ConstraintEvaluator.cc.nat_axes
diff --git a/d4_ce/D4ConstraintEvaluator.h.nat_axes → ...ed/d4_ce/D4ConstraintEvaluator.h.nat_axes b/d4_ce/D4ConstraintEvaluator.h.nat_axes → ...ed/d4_ce/D4ConstraintEvaluator.h.nat_axes
diff --git a/d4_ce/d4_ce_parser.yy.nat_axes → retired/d4_ce/d4_ce_parser.yy.nat_axes b/d4_ce/d4_ce_parser.yy.nat_axes → retired/d4_ce/d4_ce_parser.yy.nat_axes
diff --git a/d4_ce/d4_ce_scanner.ll.nat_axes → retired/d4_ce/d4_ce_scanner.ll.nat_axes b/d4_ce/d4_ce_scanner.ll.nat_axes → retired/d4_ce/d4_ce_scanner.ll.nat_axes
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
@@ -69,11 +69,15 @@ if(TIRPC_FOUND)
 	target_link_libraries(expr-test PRIVATE ${TIRPC_LIBRARIES})
 endif()
 
+# There are headers that are generated and put in the CURRENT_BINARY_DIR.
+# When the scanner tester was added to dmr-test, the <build>/d4_ce had
+# to be added to the target_include_directories
 add_executable(dmr-test dmr-test.cc D4ResponseBuilder.cc)
 target_include_directories(dmr-test
 		PRIVATE
 		${CMAKE_CURRENT_SOURCE_DIR} ${LIBXML2_INCLUDE_DIR}
-		${CMAKE_SOURCE_DIR}/d4_ce  ${CMAKE_SOURCE_DIR}/d4_function)
+		${CMAKE_SOURCE_DIR}/d4_ce  ${CMAKE_SOURCE_DIR}/d4_function
+		${CMAKE_BINARY_DIR}/d4_ce)
 target_link_libraries(dmr-test PRIVATE test-types dapserver dap)
 if(TIRPC_FOUND)
 	target_include_directories(dmr-test PRIVATE ${TIRPC_INCLUDE_DIRS})

diff --git a/tests/DMRTest.at b/tests/DMRTest.at
@@ -715,3 +715,24 @@ DMR_TRANS_CE([vol_1_ce_13.xml], [/inst2/inst3], [vol_1_ce_13.xml.2.trans_base],
 
 DMR_TRANS_CE([vol_1_ce_13.xml], [/attr_only_global], [vol_1_ce_13.xml.3.trans_base], [pass])
 DMR_TRANS_CE([vol_1_ce_13.xml], [/inst2/attr_only], [vol_1_ce_13.xml.4.trans_base], [pass])
+
+# Test the value-based subsetting syntax addition. NB: vbs (value-based subsetting). jhrg 12/29/25
+# This is the baseline version to start with - the temporary hack for the parsing uses an 'index'
+# of 0:1:0. Thus, _for now_, all these CEs return the same values. That will change once the work
+# moves forward.
+
+# NB: test_array_4.xml uses shared dimensions, to the indicial syntax parses, but it does not
+# have Maps, so this kind of subsetting will actually fail. Make sure that these six following
+# tests _do_ fail once the VBS logic is coded. jhrg 12/29/25
+DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[[0]];/col=[[0]];a[[]][[]] ], [test_array_4.xml.vbs.1a.trans_base], [pass])
+DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[[(0):(1)]];/col=[[(0):(1)]];a[[]][[]] ], [test_array_4.xml.vbs.1a.trans_base], [pass])
+DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[[(10):(11.11)]];/col=[[(0.4):(-10.7)]];a[[]][[]] ], [test_array_4.xml.vbs.1a.trans_base], [pass])
+
+DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[[0]];/col=[[0]];x[[]][[]] ], [test_array_4.xml.vbs.1x.trans_base], [pass])
+DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[[(0):(1)]];/col=[[(0):(1)]];x[[]][[]] ], [test_array_4.xml.vbs.1x.trans_base], [pass])
+DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[[(10):(11.11)]];/col=[[(0.4):(-10.7)]];x[[]][[]] ], [test_array_4.xml.vbs.1x.trans_base], [pass])
+
+# NB: vol_1_ce_7.xml has Maps and this should not only parse but also subset by value. jhrg 12/29/25
+DMR_TRANS_CE_NO_CRC([vol_1_ce_7.xml], [nlon=[[0:2]];nlat=[[0]];temp[[]][[]] ], [vol_1_ce_7.xml.vbs.1.trans_base], [pass])
+DMR_TRANS_CE_NO_CRC([vol_1_ce_7.xml], [nlon=[[0:2]];nlat=[[(10.0):(-17.9)]];temp[[]][[]] ], [vol_1_ce_7.xml.vbs.1.trans_base], [pass])
+DMR_TRANS_CE_NO_CRC([vol_1_ce_7.xml], [nlon=[[0:2]];nlat=[[(0.09):(-17.9)]];temp[[]][[]] ], [vol_1_ce_7.xml.vbs.1.trans_base], [pass])
diff --git a/tests/cmake/dmr-tests.cmake b/tests/cmake/dmr-tests.cmake
@@ -458,3 +458,37 @@ dmr_series_test(177 names_with_spaces2.dmr "/inst2/\"Point%20Break\".x" names_wi
 
 dmr_series_test(178 names_with_spaces3.dmr "/inst2/\"New Group\"/x" names_with_spaces3.dmr.1.trans_base "xfail")
 dmr_series_test(179 names_with_spaces3.dmr "/inst2/New%20Group/x" names_with_spaces3.dmr.1.trans_base "xfail")
+
+# Test the value-based subsetting syntax addition. NB: vbs (value-based subsetting). jhrg 12/29/25
+# This is the baseline version to start with - the temporary hack for the parsing uses an 'index'
+# of 0:1:0. Thus, _for now_, all these CEs return the same values. That will change once the work
+# moves forward.
+
+# NB: test_array_4.xml uses shared dimensions, to the indicial syntax parses, but it does not
+# have Maps, so this kind of subsetting will actually fail. Make sure that these six following
+# tests _do_ fail once the VBS logic is coded. jhrg 12/29/25
+dmr_trans_test(180 test_array_4.xml "/row=[0];/col=[0];a[][]" "" test_array_4.xml.vbs.1a.trans_base "universal")
+dmr_trans_test(181 test_array_4.xml "/row=[(0):(1)];/col=[(0):(1)];a[][]" "" test_array_4.xml.vbs.1a.trans_base "universal")
+dmr_trans_test(182 test_array_4.xml "/row=[(10):(11.11)];/col=[(0.4):(-10.7)];a[][]" "" test_array_4.xml.vbs.1a.trans_base "universal")
+
+# The above tests were derived from, and use the same baselines as, these three below. jhrg 12/29/25
+# DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[0];/col=[0];a[][] ], [test_array_4.xml.vbs.1a.trans_base], [pass])
+# DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[(0):(1)];/col=[(0):(1)];a[][] ], [test_array_4.xml.vbs.1a.trans_base], [pass])
+# DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[(10):(11.11)];/col=[(0.4):(-10.7)];a[][] ], [test_array_4.xml.vbs.1a.trans_base], [pass])
+
+dmr_trans_test(183 test_array_4.xml "/row=[0];/col=[0];x[][]" "" test_array_4.xml.vbs.1x.trans_base "universal")
+dmr_trans_test(184 test_array_4.xml "/row=[(0):(1)];/col=[(0):(1)];x[][]" "" test_array_4.xml.vbs.1x.trans_base "universal")
+dmr_trans_test(185 test_array_4.xml "/row=[(10):(11.11)];/col=[(0.4):(-10.7)];x[][]" "" test_array_4.xml.vbs.1x.trans_base "universal")
+
+# DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[0];/col=[0];x[][] ], [test_array_4.xml.vbs.1x.trans_base], [pass])
+# DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[(0):(1)];/col=[(0):(1)];x[][] ], [test_array_4.xml.vbs.1x.trans_base], [pass])
+# DMR_TRANS_CE_NO_CRC([test_array_4.xml], [/row=[(10):(11.11)];/col=[(0.4):(-10.7)];x[][] ], [test_array_4.xml.vbs.1x.trans_base], [pass])
+
+# NB: vol_1_ce_7.xml has Maps and this should not only parse but also subset by value. jhrg 12/29/25
+dmr_trans_test(186 vol_1_ce_7.xml "nlon=[0:2];nlat=[0];temp[][]" "" vol_1_ce_7.xml.vbs.1.trans_base "universal")
+dmr_trans_test(187 vol_1_ce_7.xml "nlon=[0:2];nlat=[(10.0):(-17.9)];temp[][]" "" vol_1_ce_7.xml.vbs.1.trans_base "universal")
+dmr_trans_test(188 vol_1_ce_7.xml "nlon=[0:2];nlat=[(0.09):(-17.9)];temp[][]" "" vol_1_ce_7.xml.vbs.1.trans_base "universal")
+
+# DMR_TRANS_CE_NO_CRC([vol_1_ce_7.xml], [nlon=[0:2];nlat=[0];temp[][] ], [vol_1_ce_7.xml.vbs.1.trans_base], [pass])
+# DMR_TRANS_CE_NO_CRC([vol_1_ce_7.xml], [nlon=[0:2];nlat=[(10.0):(-17.9)];temp[][] ], [vol_1_ce_7.xml.vbs.1.trans_base], [pass])
+# DMR_TRANS_CE_NO_CRC([vol_1_ce_7.xml], [nlon=[0:2];nlat=[(0.09):(-17.9)];temp[][] ], [vol_1_ce_7.xml.vbs.1.trans_base], [pass])