alfasoftware · RadikalJin · Jun 10, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/UseCase.java b/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/UseCase.java
@@ -87,4 +87,17 @@ default String[] getSources() {
   default int getParallelism() {
     return Runtime.getRuntime().availableProcessors();
   }
+
+  /**
+   * @return The number of source files to include in each batch when parsing with a shared
+   *         compilation environment. A larger batch size amortises the cost of classpath
+   *         initialisation over more files but increases the peak heap required during the
+   *         parse phase; a smaller value reduces peak heap at the cost of more initialisations.
+   *         The default of {@code 500} suits a 512 MB heap and typical-sized Java source
+   *         files. Reduce this value if you encounter {@link OutOfMemoryError} on a large
+   *         codebase, or increase it if you have a generous heap budget.
+   */
+  default int getBatchSize() {
+    return 500;
+  }
 }
diff --git a/...ware/astra/core/refactoring/operations/interfaces/RemovePublicModifierFromInterfaces.java b/...ware/astra/core/refactoring/operations/interfaces/RemovePublicModifierFromInterfaces.java
@@ -8,6 +8,7 @@
 import org.alfasoftware.astra.core.utils.ClassVisitor;
 import org.eclipse.jdt.core.dom.ASTNode;
 import org.eclipse.jdt.core.dom.CompilationUnit;
+import org.eclipse.jdt.core.dom.IExtendedModifier;
 import org.eclipse.jdt.core.dom.MethodDeclaration;
 import org.eclipse.jdt.core.dom.Modifier;
 import org.eclipse.jdt.core.dom.rewrite.ASTRewrite;
@@ -31,7 +32,9 @@ public void run(final CompilationUnit compilationUnit, final ASTNode node, final
       .stream()
       // find any public modifiers
       .map(MethodDeclaration::modifiers)
-      .flatMap(List<Modifier>::stream)
+      .flatMap(List<IExtendedModifier>::stream)
+      .filter(Modifier.class::isInstance)
+      .map(Modifier.class::cast)
       .filter(Modifier::isPublic)
       // remove them
       .forEach(m -> rewriter.remove(m, null));

diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraCore.java b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraCore.java
@@ -12,8 +12,11 @@
 import java.time.Instant;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedHashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
@@ -26,12 +29,14 @@
 
 import org.alfasoftware.astra.core.refactoring.UseCase;
 import org.alfasoftware.astra.core.refactoring.operations.imports.UnusedImportRefactor;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.eclipse.jdt.core.dom.ASTParser;
 import org.eclipse.jdt.core.dom.CompilationUnit;
+import org.eclipse.jdt.core.dom.FileASTRequestor;
 import org.eclipse.jdt.core.dom.rewrite.ASTRewrite;
 import org.eclipse.jface.text.BadLocationException;
 import org.eclipse.text.edits.MalformedTreeException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  *  AstraCore operates on source files in an input directory, building an AST for each file, using any additional classpaths supplied. 
@@ -61,7 +66,7 @@ public static void run(String targetDirectoryPath, UseCase useCase) {
     validateSourceAndClasspath(sources, classPath);
     try {
       AstraCore main = new AstraCore();
-      main.runOperations(targetDirectoryPath, useCase, sources, classPath);
+      main.runOperations(targetDirectoryPath, useCase, sources, AstraUtils.filterClassPath(classPath));
     } catch (IOException e) {
       throw new RuntimeException("Astra run failed for directory [" + targetDirectoryPath + "]: " + e.getMessage(), e);
     }
@@ -83,20 +88,23 @@ protected void runOperations(String directoryPath, UseCase useCase, String[] sou
     AtomicLong currentPercentage = new AtomicLong();
     Instant startTime = Instant.now();
 
-    // The same file-selection filter (.java extension + the UseCase path prefiltering predicate)
-    // is used for both the count walk and the processing walk, so the progress denominator can
-    // never drift from the set of files actually processed.
+    // Build the single file-selection filter used for path scanning, progress tracking, and
+    // chunk partitioning. Using one shared Predicate instance ensures the progress denominator
+    // can never drift from the set of files actually processed.
     Path sourcePath = Paths.get(directoryPath);
     Predicate<Path> fileFilter = buildFileFilter(useCase);
 
-    // First walk: a cheap count pass that reads only directory metadata (not file contents) to
-    // determine the total number of files to process, which feeds the progress percentage below.
-    log.info("Counting files (this may take a few seconds)");
-    long totalFiles;
+    // Single walk: collect all matching paths so we can (a) derive the progress denominator
+    // without a second walk and (b) partition the list into fixed-size chunks for
+    // bounded-memory batch parsing. Only Path objects are materialised here — file contents
+    // are read lazily, one chunk at a time.
+    log.info("Scanning for files (this may take a few seconds)");
+    List<Path> allPaths = new ArrayList<>();
     try (Stream<Path> walk = Files.walk(sourcePath)) {
-      totalFiles = walk.filter(fileFilter).count();
+      walk.filter(fileFilter).forEach(allPaths::add);
     }
-    log.info(totalFiles + " files to process after prefiltering");
+    long totalFiles = allPaths.size();
+    log.info(totalFiles + " files to process after path based prefiltering");
 
     if (totalFiles == 0) {
       log.info(getPrintableDuration(Duration.between(startTime, Instant.now())));
@@ -105,36 +113,107 @@ protected void runOperations(String directoryPath, UseCase useCase, String[] sou
 
     Set<? extends ASTOperation> operations = useCase.getOperations();
     int parallelism = useCase.getParallelism();
+    int batchSize = useCase.getBatchSize();
     Predicate<String> contentPrefilteringPredicate = useCase.getContentPrefilteringPredicate();
-    log.info("Processing [" + totalFiles + "] files with [" + parallelism + "] thread(s)");
+    log.info("Processing [" + totalFiles + "] files with [" + parallelism + "] thread(s), batch size [" + batchSize + "]");
 
+    // Process files in fixed-size chunks to keep peak heap bounded. For each chunk we read
+    // content, apply the content-prefiltering predicate, and batch-parse only the files that
+    // pass. The per-chunk Maps and CompilationUnit objects are eligible for GC as soon as the
+    // chunk's futures have been waited on, so peak heap scales with batchSize — not totalFiles.
     List<Throwable> fileErrors = new ArrayList<>();
     ExecutorService executor = Executors.newFixedThreadPool(parallelism);
+    int numberOfChunks = (int) Math.ceil((double) allPaths.size() / batchSize);
+    if (numberOfChunks > 1) {
+      log.info("Processing in [" + numberOfChunks + "] chunk(s) of up to [" + batchSize + "] file(s) each");
+    }
+
     try {
-      // Second walk: stream paths directly into the executor as they are encountered, without
-      // materialising every path into a List first. This avoids holding all paths in memory at
-      // once on very large source trees.
-      List<Future<?>> futures = new ArrayList<>();
-      try (Stream<Path> walk = Files.walk(sourcePath)) {
-        walk.filter(fileFilter)
-            .forEach(f -> futures.add(executor.submit(() -> applyOperationsAndSave(f, operations, sources, classPath, contentPrefilteringPredicate))));
-      }
+      for (int chunkIndex = 0; chunkIndex < numberOfChunks; chunkIndex++) {
+        int chunkStart = chunkIndex * batchSize;
+        int chunkEnd = Math.min(chunkStart + batchSize, allPaths.size());
+        List<Path> chunk = allPaths.subList(chunkStart, chunkEnd);
+
+        // Read and content-prefilter only this chunk's files. Limiting content reads to one
+        // chunk at a time keeps peak heap proportional to batchSize rather than totalFiles.
+        List<Path> chunkToParse = new ArrayList<>();
+        Map<String, String> chunkContent = new LinkedHashMap<>();
+        List<Path> chunkContentFiltered = new ArrayList<>();
+        Map<Path, RuntimeException> chunkReadFailures = new LinkedHashMap<>();
+
+        for (Path path : chunk) {
+          try {
+            String content = new String(Files.readAllBytes(path.toAbsolutePath()));
+            if (contentPrefilteringPredicate.test(content)) {
+              chunkToParse.add(path);
+              chunkContent.put(path.toAbsolutePath().normalize().toString(), content);
+            } else {
+              log.debug("Skipping [{}] — excluded by content pre-filtering predicate", path);
+              chunkContentFiltered.add(path);
+            }
+          } catch (IOException e) {
+            chunkReadFailures.put(path, new RuntimeException(
+                "Failed to read file [" + path + "]: " + e.getMessage(), e));
+          }
+        }
 
-      for (Future<?> future : futures) {
-        try {
-          future.get();
-        } catch (ExecutionException e) {
-          Throwable cause = e.getCause();
-          log.error("Failed to process file: " + cause.getMessage(), cause);
-          fileErrors.add(cause);
-        } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
-          throw new IOException("File processing was interrupted", e);
+        if (numberOfChunks > 1) {
+          log.info("Batch parsing chunk [" + (chunkIndex + 1) + "/" + numberOfChunks + "] — " + chunkToParse.size() + " file(s)");
+        } else {
+          log.info("Batch parsing [" + chunkToParse.size() + "] file(s) with shared compilation environment");
         }
-        long idx = currentFileIndex.incrementAndGet();
-        long newPct = idx * 100 / totalFiles;
-        if (currentPercentage.getAndSet(newPct) != newPct) {
-          logProgress(idx, newPct, startTime, totalFiles);
+
+        Map<String, CompilationUnit> parsedUnits = batchParseFiles(chunkToParse, sources, classPath);
+
+        List<Future<?>> chunkFutures = new ArrayList<>();
+
+        // Submit work futures for files that were batch-parsed.
+        for (Path path : chunkToParse) {
+          String key = path.toAbsolutePath().normalize().toString();
+          CompilationUnit cu = parsedUnits.get(key);
+          String content = chunkContent.get(key);
+          if (cu != null && content != null) {
+            chunkFutures.add(executor.submit(() ->
+                applyOperationsAndSaveWithPreParsedCompilationUnit(path, content, cu, operations, sources, classPath)));
+          } else {
+            // Defensive fallback: batch parse did not return a CU (should not happen with JDT).
+            log.warn("Batch parse produced no CompilationUnit for [{}]; falling back to per-file parse", path);
+            chunkFutures.add(executor.submit(() ->
+                applyOperationsAndSave(path, operations, sources, classPath, s -> true)));
+          }
+        }
+
+        // Submit failure futures for files that could not be read (surfaces them via future.get()).
+        for (Map.Entry<Path, RuntimeException> entry : chunkReadFailures.entrySet()) {
+          RuntimeException ex = entry.getValue();
+          chunkFutures.add(executor.submit((Runnable) () -> { throw ex; }));
+        }
+
+        // Submit no-op futures for content-filtered files so that they count toward the progress
+        // denominator, preserving the same progress behaviour as the previous per-file code path.
+        for (int i = 0; i < chunkContentFiltered.size(); i++) {
+          chunkFutures.add(executor.submit(() -> {}));
+        }
+
+        // Wait for all futures in this chunk before parsing the next chunk. This ensures that
+        // the CompilationUnit objects captured by the submitted tasks can be garbage-collected
+        // before the next chunk is loaded, keeping peak heap proportional to batchSize.
+        for (Future<?> future : chunkFutures) {
+          try {
+            future.get();
+          } catch (ExecutionException e) {
+            Throwable cause = e.getCause();
+            log.error("Failed to process file: " + cause.getMessage(), cause);
+            fileErrors.add(cause);
+          } catch (InterruptedException e) {
+            Thread.currentThread().interrupt();
+            throw new IOException("File processing was interrupted", e);
+          }
+          long idx = currentFileIndex.incrementAndGet();
+          long newPct = idx * 100 / totalFiles;
+          if (currentPercentage.getAndSet(newPct) != newPct) {
+            logProgress(idx, newPct, startTime, totalFiles);
+          }
         }
       }
     } finally {
@@ -155,6 +234,105 @@ protected void runOperations(String directoryPath, UseCase useCase, String[] sou
   }
 
 
+  /**
+   * Batch-parses one chunk of source files using a single shared JDT compilation environment.
+   * Called once per chunk during a run; the number of paths is bounded by
+   * {@link UseCase#getBatchSize()}.
+   *
+   * <p>A single {@link ASTParser} is configured with the supplied classpath and source paths,
+   * and {@link ASTParser#createASTs} is called with the chunk's file paths in one shot.  JDT
+   * initialises its internal {@code LookupEnvironment} — which involves scanning every JAR
+   * and source root on the classpath — exactly once for the chunk rather than once per
+   * file, which is the primary cost saving over the per-file {@code createAST()} API.
+   *
+   * <p><strong>Thread safety:</strong> {@code createASTs()} processes files sequentially on
+   * the calling thread, calling back into {@code acceptAST()} for each one.  Bindings are
+   * resolved eagerly during parsing; by the time this method returns the returned
+   * {@link CompilationUnit} objects are fully resolved and can be read safely from multiple
+   * worker threads in the subsequent parallel operation-application phase — provided those
+   * threads do not themselves trigger new binding lookups that write to the shared
+   * {@code LookupEnvironment}.  In practice, all Astra operations only <em>read</em>
+   * already-resolved bindings, so concurrent operation application is safe.
+   *
+   * @return a map from normalised absolute path string to {@link CompilationUnit}; the path
+   *         strings are exactly those returned by
+   *         {@link Path#toAbsolutePath()}{@code .normalize().toString()} for each input path.
+   */
+  private static Map<String, CompilationUnit> batchParseFiles(
+      List<Path> paths, String[] sources, String[] classPath) {
+
+    Map<String, CompilationUnit> result = new HashMap<>(paths.size() * 2);
+
+    if (paths.isEmpty()) {
+      return result;
+    }
+
+    ASTParser parser = AstraUtils.createBatchParser(sources, classPath);
+
+    String[] absolutePaths = paths.stream()
+        .map(p -> p.toAbsolutePath().normalize().toString())
+        .toArray(String[]::new);
+    String[] fileEncodings = new String[absolutePaths.length];
+    Arrays.fill(fileEncodings, "UTF-8");
+
+    parser.createASTs(absolutePaths, fileEncodings, new String[0],
+        new FileASTRequestor() {
+          @Override
+          public void acceptAST(String sourceFilePath, CompilationUnit ast) {
+            // sourceFilePath is exactly what we passed in (absolute + normalised).
+            ast.setProperty(CompilationUnitProperty.ABSOLUTE_PATH,
+                Paths.get(sourceFilePath).toAbsolutePath());
+            ast.recordModifications();
+            result.put(sourceFilePath, ast);
+          }
+        }, null);
+
+    return result;
+  }
+
+
+  /**
+   * Applies {@code operations} to a file whose {@link CompilationUnit} was already produced by
+   * the batch parse, then runs import cleanup and writes the file back if content changed.
+   *
+   * <p>This mirrors the logic in {@link #applyOperationsAndSave} but skips the per-file
+   * {@link ASTParser} / {@link AstraUtils#readAsCompilationUnit} call that would otherwise
+   * re-initialise the JDT classpath environment for this individual file.
+   *
+   * <p>This method is designed to be called from multiple worker threads in parallel; each
+   * invocation operates exclusively on its own {@link CompilationUnit} and {@link ASTRewrite},
+   * so there is no shared mutable state between threads.
+   */
+  private void applyOperationsAndSaveWithPreParsedCompilationUnit(
+      Path javaFile,
+      String fileContentBefore,
+      CompilationUnit preParseUnit,
+      Set<? extends ASTOperation> operations,
+      String[] sources,
+      String[] classpath) {
+    try {
+      ASTRewrite rewriter = runOperations(operations, preParseUnit);
+      String fileContentAfter = makeChangesFromAST(fileContentBefore, rewriter);
+
+      if (fileContentAfter.equals(fileContentBefore)) {
+        return;
+      }
+
+      // File was changed: run import cleanup and write back.
+      fileContentAfter = applyOperationsToSource(
+          new HashSet<>(Arrays.asList(new UnusedImportRefactor())),
+          sources, classpath, javaFile, fileContentAfter);
+
+      if (!fileContentAfter.equals(fileContentBefore)) {
+        Files.write(javaFile.toAbsolutePath(), fileContentAfter.getBytes(),
+            StandardOpenOption.TRUNCATE_EXISTING);
+      }
+    } catch (IOException | BadLocationException | IllegalArgumentException e) {
+      throw new RuntimeException("Failed to process file [" + javaFile + "]: " + e.getMessage(), e);
+    }
+  }
+
+
   /**
    * Builds the single, shared file-selection filter applied to every {@link Path} encountered when
    * walking the source directory. This combines the {@code .java} file check with the path-level