diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/UseCase.java b/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/UseCase.java index a85dfb5d..314b9c75 100644 --- a/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/UseCase.java +++ b/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/UseCase.java @@ -87,4 +87,17 @@ default String[] getSources() { default int getParallelism() { return Runtime.getRuntime().availableProcessors(); } + + /** + * @return The number of source files to include in each batch when parsing with a shared + * compilation environment. A larger batch size amortises the cost of classpath + * initialisation over more files but increases the peak heap required during the + * parse phase; a smaller value reduces peak heap at the cost of more initialisations. + * The default of {@code 500} suits a 512 MB heap and typical-sized Java source + * files. Reduce this value if you encounter {@link OutOfMemoryError} on a large + * codebase, or increase it if you have a generous heap budget. + */ + default int getBatchSize() { + return 500; + } } diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/operations/interfaces/RemovePublicModifierFromInterfaces.java b/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/operations/interfaces/RemovePublicModifierFromInterfaces.java index 1a4d2ebf..375792e5 100644 --- a/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/operations/interfaces/RemovePublicModifierFromInterfaces.java +++ b/astra-core/src/main/java/org/alfasoftware/astra/core/refactoring/operations/interfaces/RemovePublicModifierFromInterfaces.java @@ -8,6 +8,7 @@ import org.alfasoftware.astra.core.utils.ClassVisitor; import org.eclipse.jdt.core.dom.ASTNode; import org.eclipse.jdt.core.dom.CompilationUnit; +import org.eclipse.jdt.core.dom.IExtendedModifier; import org.eclipse.jdt.core.dom.MethodDeclaration; import org.eclipse.jdt.core.dom.Modifier; import org.eclipse.jdt.core.dom.rewrite.ASTRewrite; @@ -31,7 +32,9 @@ public void run(final CompilationUnit compilationUnit, final ASTNode node, final .stream() // find any public modifiers .map(MethodDeclaration::modifiers) - .flatMap(List::stream) + .flatMap(List::stream) + .filter(Modifier.class::isInstance) + .map(Modifier.class::cast) .filter(Modifier::isPublic) // remove them .forEach(m -> rewriter.remove(m, null)); diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraCore.java b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraCore.java index ea74065e..524db9f5 100644 --- a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraCore.java +++ b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraCore.java @@ -12,8 +12,11 @@ import java.time.Instant; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -26,12 +29,14 @@ import org.alfasoftware.astra.core.refactoring.UseCase; import org.alfasoftware.astra.core.refactoring.operations.imports.UnusedImportRefactor; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +import org.eclipse.jdt.core.dom.ASTParser; import org.eclipse.jdt.core.dom.CompilationUnit; +import org.eclipse.jdt.core.dom.FileASTRequestor; import org.eclipse.jdt.core.dom.rewrite.ASTRewrite; import org.eclipse.jface.text.BadLocationException; import org.eclipse.text.edits.MalformedTreeException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * AstraCore operates on source files in an input directory, building an AST for each file, using any additional classpaths supplied.  @@ -61,7 +66,7 @@ public static void run(String targetDirectoryPath, UseCase useCase) { validateSourceAndClasspath(sources, classPath); try { AstraCore main = new AstraCore(); - main.runOperations(targetDirectoryPath, useCase, sources, classPath); + main.runOperations(targetDirectoryPath, useCase, sources, AstraUtils.filterClassPath(classPath)); } catch (IOException e) { throw new RuntimeException("Astra run failed for directory [" + targetDirectoryPath + "]: " + e.getMessage(), e); } @@ -83,20 +88,23 @@ protected void runOperations(String directoryPath, UseCase useCase, String[] sou AtomicLong currentPercentage = new AtomicLong(); Instant startTime = Instant.now(); - // The same file-selection filter (.java extension + the UseCase path prefiltering predicate) - // is used for both the count walk and the processing walk, so the progress denominator can - // never drift from the set of files actually processed. + // Build the single file-selection filter used for path scanning, progress tracking, and + // chunk partitioning. Using one shared Predicate instance ensures the progress denominator + // can never drift from the set of files actually processed. Path sourcePath = Paths.get(directoryPath); Predicate fileFilter = buildFileFilter(useCase); - // First walk: a cheap count pass that reads only directory metadata (not file contents) to - // determine the total number of files to process, which feeds the progress percentage below. - log.info("Counting files (this may take a few seconds)"); - long totalFiles; + // Single walk: collect all matching paths so we can (a) derive the progress denominator + // without a second walk and (b) partition the list into fixed-size chunks for + // bounded-memory batch parsing. Only Path objects are materialised here — file contents + // are read lazily, one chunk at a time. + log.info("Scanning for files (this may take a few seconds)"); + List allPaths = new ArrayList<>(); try (Stream walk = Files.walk(sourcePath)) { - totalFiles = walk.filter(fileFilter).count(); + walk.filter(fileFilter).forEach(allPaths::add); } - log.info(totalFiles + " files to process after prefiltering"); + long totalFiles = allPaths.size(); + log.info(totalFiles + " files to process after path based prefiltering"); if (totalFiles == 0) { log.info(getPrintableDuration(Duration.between(startTime, Instant.now()))); @@ -105,36 +113,107 @@ protected void runOperations(String directoryPath, UseCase useCase, String[] sou Set operations = useCase.getOperations(); int parallelism = useCase.getParallelism(); + int batchSize = useCase.getBatchSize(); Predicate contentPrefilteringPredicate = useCase.getContentPrefilteringPredicate(); - log.info("Processing [" + totalFiles + "] files with [" + parallelism + "] thread(s)"); + log.info("Processing [" + totalFiles + "] files with [" + parallelism + "] thread(s), batch size [" + batchSize + "]"); + // Process files in fixed-size chunks to keep peak heap bounded. For each chunk we read + // content, apply the content-prefiltering predicate, and batch-parse only the files that + // pass. The per-chunk Maps and CompilationUnit objects are eligible for GC as soon as the + // chunk's futures have been waited on, so peak heap scales with batchSize — not totalFiles. List fileErrors = new ArrayList<>(); ExecutorService executor = Executors.newFixedThreadPool(parallelism); + int numberOfChunks = (int) Math.ceil((double) allPaths.size() / batchSize); + if (numberOfChunks > 1) { + log.info("Processing in [" + numberOfChunks + "] chunk(s) of up to [" + batchSize + "] file(s) each"); + } + try { - // Second walk: stream paths directly into the executor as they are encountered, without - // materialising every path into a List first. This avoids holding all paths in memory at - // once on very large source trees. - List> futures = new ArrayList<>(); - try (Stream walk = Files.walk(sourcePath)) { - walk.filter(fileFilter) - .forEach(f -> futures.add(executor.submit(() -> applyOperationsAndSave(f, operations, sources, classPath, contentPrefilteringPredicate)))); - } + for (int chunkIndex = 0; chunkIndex < numberOfChunks; chunkIndex++) { + int chunkStart = chunkIndex * batchSize; + int chunkEnd = Math.min(chunkStart + batchSize, allPaths.size()); + List chunk = allPaths.subList(chunkStart, chunkEnd); + + // Read and content-prefilter only this chunk's files. Limiting content reads to one + // chunk at a time keeps peak heap proportional to batchSize rather than totalFiles. + List chunkToParse = new ArrayList<>(); + Map chunkContent = new LinkedHashMap<>(); + List chunkContentFiltered = new ArrayList<>(); + Map chunkReadFailures = new LinkedHashMap<>(); + + for (Path path : chunk) { + try { + String content = new String(Files.readAllBytes(path.toAbsolutePath())); + if (contentPrefilteringPredicate.test(content)) { + chunkToParse.add(path); + chunkContent.put(path.toAbsolutePath().normalize().toString(), content); + } else { + log.debug("Skipping [{}] — excluded by content pre-filtering predicate", path); + chunkContentFiltered.add(path); + } + } catch (IOException e) { + chunkReadFailures.put(path, new RuntimeException( + "Failed to read file [" + path + "]: " + e.getMessage(), e)); + } + } - for (Future future : futures) { - try { - future.get(); - } catch (ExecutionException e) { - Throwable cause = e.getCause(); - log.error("Failed to process file: " + cause.getMessage(), cause); - fileErrors.add(cause); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException("File processing was interrupted", e); + if (numberOfChunks > 1) { + log.info("Batch parsing chunk [" + (chunkIndex + 1) + "/" + numberOfChunks + "] — " + chunkToParse.size() + " file(s)"); + } else { + log.info("Batch parsing [" + chunkToParse.size() + "] file(s) with shared compilation environment"); } - long idx = currentFileIndex.incrementAndGet(); - long newPct = idx * 100 / totalFiles; - if (currentPercentage.getAndSet(newPct) != newPct) { - logProgress(idx, newPct, startTime, totalFiles); + + Map parsedUnits = batchParseFiles(chunkToParse, sources, classPath); + + List> chunkFutures = new ArrayList<>(); + + // Submit work futures for files that were batch-parsed. + for (Path path : chunkToParse) { + String key = path.toAbsolutePath().normalize().toString(); + CompilationUnit cu = parsedUnits.get(key); + String content = chunkContent.get(key); + if (cu != null && content != null) { + chunkFutures.add(executor.submit(() -> + applyOperationsAndSaveWithPreParsedCompilationUnit(path, content, cu, operations, sources, classPath))); + } else { + // Defensive fallback: batch parse did not return a CU (should not happen with JDT). + log.warn("Batch parse produced no CompilationUnit for [{}]; falling back to per-file parse", path); + chunkFutures.add(executor.submit(() -> + applyOperationsAndSave(path, operations, sources, classPath, s -> true))); + } + } + + // Submit failure futures for files that could not be read (surfaces them via future.get()). + for (Map.Entry entry : chunkReadFailures.entrySet()) { + RuntimeException ex = entry.getValue(); + chunkFutures.add(executor.submit((Runnable) () -> { throw ex; })); + } + + // Submit no-op futures for content-filtered files so that they count toward the progress + // denominator, preserving the same progress behaviour as the previous per-file code path. + for (int i = 0; i < chunkContentFiltered.size(); i++) { + chunkFutures.add(executor.submit(() -> {})); + } + + // Wait for all futures in this chunk before parsing the next chunk. This ensures that + // the CompilationUnit objects captured by the submitted tasks can be garbage-collected + // before the next chunk is loaded, keeping peak heap proportional to batchSize. + for (Future future : chunkFutures) { + try { + future.get(); + } catch (ExecutionException e) { + Throwable cause = e.getCause(); + log.error("Failed to process file: " + cause.getMessage(), cause); + fileErrors.add(cause); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new IOException("File processing was interrupted", e); + } + long idx = currentFileIndex.incrementAndGet(); + long newPct = idx * 100 / totalFiles; + if (currentPercentage.getAndSet(newPct) != newPct) { + logProgress(idx, newPct, startTime, totalFiles); + } } } } finally { @@ -155,6 +234,105 @@ protected void runOperations(String directoryPath, UseCase useCase, String[] sou } + /** + * Batch-parses one chunk of source files using a single shared JDT compilation environment. + * Called once per chunk during a run; the number of paths is bounded by + * {@link UseCase#getBatchSize()}. + * + *

A single {@link ASTParser} is configured with the supplied classpath and source paths, + * and {@link ASTParser#createASTs} is called with the chunk's file paths in one shot. JDT + * initialises its internal {@code LookupEnvironment} — which involves scanning every JAR + * and source root on the classpath — exactly once for the chunk rather than once per + * file, which is the primary cost saving over the per-file {@code createAST()} API. + * + *

Thread safety: {@code createASTs()} processes files sequentially on + * the calling thread, calling back into {@code acceptAST()} for each one. Bindings are + * resolved eagerly during parsing; by the time this method returns the returned + * {@link CompilationUnit} objects are fully resolved and can be read safely from multiple + * worker threads in the subsequent parallel operation-application phase — provided those + * threads do not themselves trigger new binding lookups that write to the shared + * {@code LookupEnvironment}. In practice, all Astra operations only read + * already-resolved bindings, so concurrent operation application is safe. + * + * @return a map from normalised absolute path string to {@link CompilationUnit}; the path + * strings are exactly those returned by + * {@link Path#toAbsolutePath()}{@code .normalize().toString()} for each input path. + */ + private static Map batchParseFiles( + List paths, String[] sources, String[] classPath) { + + Map result = new HashMap<>(paths.size() * 2); + + if (paths.isEmpty()) { + return result; + } + + ASTParser parser = AstraUtils.createBatchParser(sources, classPath); + + String[] absolutePaths = paths.stream() + .map(p -> p.toAbsolutePath().normalize().toString()) + .toArray(String[]::new); + String[] fileEncodings = new String[absolutePaths.length]; + Arrays.fill(fileEncodings, "UTF-8"); + + parser.createASTs(absolutePaths, fileEncodings, new String[0], + new FileASTRequestor() { + @Override + public void acceptAST(String sourceFilePath, CompilationUnit ast) { + // sourceFilePath is exactly what we passed in (absolute + normalised). + ast.setProperty(CompilationUnitProperty.ABSOLUTE_PATH, + Paths.get(sourceFilePath).toAbsolutePath()); + ast.recordModifications(); + result.put(sourceFilePath, ast); + } + }, null); + + return result; + } + + + /** + * Applies {@code operations} to a file whose {@link CompilationUnit} was already produced by + * the batch parse, then runs import cleanup and writes the file back if content changed. + * + *

This mirrors the logic in {@link #applyOperationsAndSave} but skips the per-file + * {@link ASTParser} / {@link AstraUtils#readAsCompilationUnit} call that would otherwise + * re-initialise the JDT classpath environment for this individual file. + * + *

This method is designed to be called from multiple worker threads in parallel; each + * invocation operates exclusively on its own {@link CompilationUnit} and {@link ASTRewrite}, + * so there is no shared mutable state between threads. + */ + private void applyOperationsAndSaveWithPreParsedCompilationUnit( + Path javaFile, + String fileContentBefore, + CompilationUnit preParseUnit, + Set operations, + String[] sources, + String[] classpath) { + try { + ASTRewrite rewriter = runOperations(operations, preParseUnit); + String fileContentAfter = makeChangesFromAST(fileContentBefore, rewriter); + + if (fileContentAfter.equals(fileContentBefore)) { + return; + } + + // File was changed: run import cleanup and write back. + fileContentAfter = applyOperationsToSource( + new HashSet<>(Arrays.asList(new UnusedImportRefactor())), + sources, classpath, javaFile, fileContentAfter); + + if (!fileContentAfter.equals(fileContentBefore)) { + Files.write(javaFile.toAbsolutePath(), fileContentAfter.getBytes(), + StandardOpenOption.TRUNCATE_EXISTING); + } + } catch (IOException | BadLocationException | IllegalArgumentException e) { + throw new RuntimeException("Failed to process file [" + javaFile + "]: " + e.getMessage(), e); + } + } + + /** * Builds the single, shared file-selection filter applied to every {@link Path} encountered when * walking the source directory. This combines the {@code .java} file check with the path-level diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraUtils.java b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraUtils.java index 3b04034f..0075eaba 100644 --- a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraUtils.java +++ b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/AstraUtils.java @@ -1,5 +1,6 @@ package org.alfasoftware.astra.core.utils; +import java.io.File; import java.lang.reflect.Modifier; import java.nio.file.Path; import java.util.Arrays; @@ -12,8 +13,6 @@ import java.util.stream.Collectors; import org.alfasoftware.astra.core.matchers.AnnotationMatcher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.eclipse.jdt.core.JavaCore; import org.eclipse.jdt.core.Signature; import org.eclipse.jdt.core.dom.AST; @@ -49,6 +48,8 @@ import org.eclipse.jdt.core.dom.rewrite.ListRewrite; import org.eclipse.jdt.core.formatter.DefaultCodeFormatterConstants; import org.eclipse.jface.text.BadLocationException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Utility functions for working with ASTs including creation of ASTs from source files, writing changes back to the source file, @@ -68,12 +69,11 @@ public static CompilationUnit readAsCompilationUnit(Path file, String fileSource return compilationUnit; } - private static final String JAVA_VERSION = JavaCore.VERSION_17; + private static final String JAVA_VERSION = JavaCore.latestSupportedJavaVersion(); public static ASTParser createParser(String fileSource, String[] sources, String[] classPath) { - @SuppressWarnings("deprecation") // This is just saying "use a newer Java version" - ASTParser parser = ASTParser.newParser(AST.JLS17); + ASTParser parser = ASTParser.newParser(AST.getJLSLatest()); parser.setResolveBindings(true); parser.setBindingsRecovery(true); parser.setStatementsRecovery(true); @@ -93,6 +93,76 @@ public static ASTParser createParser(String fileSource, String[] sources, String } + /** + * Creates an {@link ASTParser} configured for batch processing via + * {@link ASTParser#createASTs(String[], String[], String[], org.eclipse.jdt.core.dom.FileASTRequestor, org.eclipse.core.runtime.IProgressMonitor)}. + * + *

The returned parser has binding resolution, binding recovery, and statement recovery + * enabled, and is configured with the supplied classpath and source paths. Unlike + * {@link #createParser}, it does not call {@code setSource()}, {@code setUnitName()}, + * or {@code setKind()} — those are per-file settings that are ignored (and must not be set) + * in batch mode. + * + *

The shared environment set up here — classpath scanning, JAR index loading — is + * amortised across every file passed to {@code createASTs()}, rather than being repeated + * once per file as in the single-file {@code createAST()} path. + */ + public static ASTParser createBatchParser(String[] sources, String[] classPath) { + ASTParser parser = ASTParser.newParser(AST.getJLSLatest()); + parser.setResolveBindings(true); + parser.setBindingsRecovery(true); + parser.setStatementsRecovery(true); + + HashMap javaCoreOptions = new HashMap<>(JavaCore.getOptions()); + JavaCore.setComplianceOptions(JAVA_VERSION, javaCoreOptions); + parser.setCompilerOptions(javaCoreOptions); + + final String[] encodings = new String[sources.length]; + Arrays.fill(encodings, "UTF-8"); + parser.setEnvironment(filterClassPath(classPath), sources, encodings, true); + return parser; + } + + + /** + * Filters classpath entries to those that JDT can open as a JAR archive or source directory, + * discarding any entry that is neither. + * + *

The per-file {@link ASTParser#createAST} API silently ignores unreadable classpath + * entries, but {@link ASTParser#createASTs} initialises the entire classpath environment + * upfront and propagates any {@link java.util.zip.ZipException} thrown when it encounters a + * non-archive file (e.g. a Maven {@code .pom} BOM artifact) in the classpath. Filtering + * here restores the tolerant behaviour while surfacing a warning so callers know which + * entries were discarded. + * + *

Entries that do not exist on disk are passed through unchanged; they were already + * validated by {@link AstraCore#validateSourceAndClasspath} and will be reported there. + */ + public static String[] filterClassPath(String[] classPath) { + return Arrays.stream(classPath) + .filter(entry -> { + if (entry == null || entry.isEmpty()) { + return false; + } + File f = new File(entry); + if (f.isDirectory()) { + return true; + } + String lower = entry.toLowerCase(); + if (lower.endsWith(".jar") || lower.endsWith(".zip")) { + return true; + } + if (f.exists()) { + log.debug("Excluding classpath entry [{}] from batch parser — not a .jar, .zip, or directory. " + + "POM-only dependencies and other non-archive entries cannot be opened by JDT " + + "and would cause a ZipException during classpath initialisation.", entry); + } + return false; + }) + .toArray(String[]::new); + } + + /** * Apply the recorded changes from the ASTRewrite to the source file, and return the result. * diff --git a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/ClassVisitor.java b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/ClassVisitor.java index 170e4ee9..f55c3164 100644 --- a/astra-core/src/main/java/org/alfasoftware/astra/core/utils/ClassVisitor.java +++ b/astra-core/src/main/java/org/alfasoftware/astra/core/utils/ClassVisitor.java @@ -7,8 +7,6 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; import org.eclipse.jdt.core.dom.ASTNode; import org.eclipse.jdt.core.dom.ASTVisitor; import org.eclipse.jdt.core.dom.AbstractTypeDeclaration; @@ -22,6 +20,7 @@ import org.eclipse.jdt.core.dom.FieldDeclaration; import org.eclipse.jdt.core.dom.IDocElement; import org.eclipse.jdt.core.dom.ImportDeclaration; +import org.eclipse.jdt.core.dom.InfixExpression; import org.eclipse.jdt.core.dom.Javadoc; import org.eclipse.jdt.core.dom.MarkerAnnotation; import org.eclipse.jdt.core.dom.MethodDeclaration; @@ -41,6 +40,8 @@ import org.eclipse.jdt.core.dom.TypeParameter; import org.eclipse.jdt.core.dom.VariableDeclarationFragment; import org.eclipse.jdt.core.dom.VariableDeclarationStatement; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Tracks what is seen when visiting all nodes in a compilation unit. @@ -63,6 +64,7 @@ public class ClassVisitor extends ASTVisitor { private final List markerAnnotations = new ArrayList<>(); private final List classInstanceCreations = new ArrayList<>(); private final List tagElements = new ArrayList<>(); + private final List infixExpressions = new ArrayList<>(); private final List patternInstanceofExpressions = new ArrayList<>(); private final List fieldAccesses = new ArrayList<>(); @@ -118,6 +120,13 @@ public boolean visit(RecordDeclaration node) { return super.visit(node); } + @Override + public boolean visit(InfixExpression node) { + log.debug("Infix: " + node); + infixExpressions.add(node); + return super.visit(node); + } + @Override public boolean visit(PatternInstanceofExpression node) { log.debug("Pattern instanceof: " + node); @@ -298,6 +307,10 @@ public List getRecordDeclarations() { .collect(Collectors.toList()); } + public List getInfixExpressions() { + return infixExpressions; + } + public List getPatternInstanceofExpressions() { return patternInstanceofExpressions; } @@ -420,6 +433,7 @@ public Set getVisitedNodes() { getImports(), getFieldAccesses(), getCastExpressions(), + getInfixExpressions(), getPatternInstanceofExpressions()) .flatMap(Collection::stream) .collect(Collectors.toSet()); diff --git a/astra-core/src/test/java/org/alfasoftware/astra/core/utils/TestAstraCoreSharedEnvironment.java b/astra-core/src/test/java/org/alfasoftware/astra/core/utils/TestAstraCoreSharedEnvironment.java new file mode 100644 index 00000000..777df34c --- /dev/null +++ b/astra-core/src/test/java/org/alfasoftware/astra/core/utils/TestAstraCoreSharedEnvironment.java @@ -0,0 +1,343 @@ +package org.alfasoftware.astra.core.utils; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Comparator; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Predicate; + +import org.alfasoftware.astra.core.refactoring.UseCase; +import org.eclipse.jdt.core.dom.ASTNode; +import org.eclipse.jdt.core.dom.CompilationUnit; +import org.eclipse.jdt.core.dom.FieldDeclaration; +import org.eclipse.jdt.core.dom.ITypeBinding; +import org.eclipse.jdt.core.dom.rewrite.ASTRewrite; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Tests for the shared JDT compilation environment introduced in {@link AstraCore}. + * + *

The batch parse path (invoked via {@link AstraCore#run}) amortises JDT classpath + * initialisation across all files in a run by using + * {@code ASTParser.createASTs()} with a single shared environment rather than calling + * {@code ASTParser.createAST()} — and therefore {@code setEnvironment()} — once per file. + * + *

These tests verify: + *

    + *
  • All files are processed through the batch path.
  • + *
  • Binding resolution works correctly for batch-parsed compilation units.
  • + *
  • Content prefiltering interacts correctly with batch parsing (skipped files are not + * parsed or visited).
  • + *
  • Batch parsing composes correctly with parallel operation application.
  • + *
+ */ +public class TestAstraCoreSharedEnvironment { + + private Path tempDir; + + @Before + public void setUp() throws IOException { + tempDir = Files.createTempDirectory("astra-shared-env-test"); + } + + @After + public void tearDown() throws IOException { + Files.walk(tempDir) + .sorted(Comparator.reverseOrder()) + .forEach(path -> path.toFile().delete()); + } + + + /** + * Verifies that the batch parse path processes every Java file in the target directory. + * This is the primary smoke test for the shared-environment path. + */ + @Test + public void testBatchParsingVisitsAllFiles() throws IOException { + int fileCount = 6; + for (int i = 1; i <= fileCount; i++) { + Files.writeString(tempDir.resolve("Shared" + i + ".java"), + "public class Shared" + i + " {}"); + } + + Set visited = ConcurrentHashMap.newKeySet(); + + AstraCore.run(tempDir.toString(), new UseCase() { + @Override + public Set getOperations() { + return Set.of((cu, node, rewriter) -> { + Path p = (Path) cu.getProperty(CompilationUnitProperty.ABSOLUTE_PATH); + if (p != null) { + visited.add(p); + } + }); + } + }); + + assertEquals("Batch parse should visit every file", fileCount, visited.size()); + } + + + /** + * Verifies that binding resolution works correctly for compilation units produced by the + * batch parse path. Files that declare a field of type {@code java.util.List} should have + * that field's type binding resolve to {@code java.util.List} (not be recovered/unknown). + * + *

The test runs with {@code parallelism = 2} to confirm that concurrent reads of + * already-resolved bindings are safe after the sequential batch parse completes. + */ + @Test + public void testBatchParsingResolvesBindingsCorrectly() throws IOException { + int fileCount = 4; + for (int i = 1; i <= fileCount; i++) { + Files.writeString(tempDir.resolve("BindingFile" + i + ".java"), + "import java.util.List;\n" + + "public class BindingFile" + i + " {\n" + + " List items;\n" + + "}"); + } + + Set resolvedQualifiedNames = ConcurrentHashMap.newKeySet(); + Set recoveredTypeNames = ConcurrentHashMap.newKeySet(); + + AstraCore.run(tempDir.toString(), new UseCase() { + @Override + public Set getOperations() { + return Set.of((CompilationUnit cu, ASTNode node, ASTRewrite rewriter) -> { + if (!(node instanceof FieldDeclaration)) { + return; + } + FieldDeclaration fd = (FieldDeclaration) node; + ITypeBinding binding = fd.getType().resolveBinding(); + if (binding == null) { + return; + } + if (binding.isRecovered()) { + recoveredTypeNames.add(fd.getType().toString()); + } else { + resolvedQualifiedNames.add(binding.getErasure().getQualifiedName()); + } + }); + } + + @Override + public int getParallelism() { + return 2; + } + }); + + assertTrue("java.util.List should resolve correctly in batch-parsed CUs", + resolvedQualifiedNames.contains("java.util.List")); + assertTrue("No bindings should be recovered (unresolvable) for standard library types", + recoveredTypeNames.isEmpty()); + } + + + /** + * Verifies that content prefiltering interacts correctly with batch parsing. + * Files whose content does not pass the predicate must not be visited by any AST operation, + * and their content must remain unchanged. + */ + @Test + public void testContentPrefilteringSkipsFilesBeforeBatchParse() throws IOException { + String token = "BATCH_TOKEN"; + String matchingContent = "public class WithToken { /* " + token + " */ }"; + String nonMatchingContent = "public class WithoutToken { /* no token here */ }"; + + Path matchingFile = tempDir.resolve("WithToken.java"); + Path nonMatchingFile = tempDir.resolve("WithoutToken.java"); + Files.writeString(matchingFile, matchingContent); + Files.writeString(nonMatchingFile, nonMatchingContent); + + Set visitedPaths = ConcurrentHashMap.newKeySet(); + + AstraCore.run(tempDir.toString(), new UseCase() { + @Override + public Predicate getContentPrefilteringPredicate() { + return content -> content.contains(token); + } + + @Override + public Set getOperations() { + return Set.of((cu, node, rewriter) -> { + Path p = (Path) cu.getProperty(CompilationUnitProperty.ABSOLUTE_PATH); + if (p != null) { + visitedPaths.add(p); + } + }); + } + + @Override + public int getParallelism() { + return 1; + } + }); + + assertTrue("File containing the token should be visited", + visitedPaths.stream().anyMatch(p -> p.getFileName().toString().equals("WithToken.java"))); + assertFalse("File without the token should not be visited", + visitedPaths.stream().anyMatch(p -> p.getFileName().toString().equals("WithoutToken.java"))); + assertEquals("Non-matching file content must be unchanged", + nonMatchingContent, Files.readString(nonMatchingFile)); + } + + + /** + * Verifies that batch parsing composes correctly with parallel operation application: + * all files should be visited even when multiple worker threads apply operations + * concurrently on the batch-parsed compilation units. + */ + @Test + public void testBatchParsingWithParallelOpsVisitsAllFiles() throws IOException { + int fileCount = 8; + for (int i = 1; i <= fileCount; i++) { + Files.writeString(tempDir.resolve("Par" + i + ".java"), + "public class Par" + i + " {}"); + } + + Set visited = ConcurrentHashMap.newKeySet(); + + AstraCore.run(tempDir.toString(), new UseCase() { + @Override + public Set getOperations() { + return Set.of((cu, node, rewriter) -> { + Path p = (Path) cu.getProperty(CompilationUnitProperty.ABSOLUTE_PATH); + if (p != null) { + visited.add(p); + } + }); + } + + @Override + public int getParallelism() { + return 4; + } + }); + + assertEquals("All files should be visited with batch parse + parallelism=4", + fileCount, visited.size()); + } + + + /** + * Verifies that a per-file operation error in the batch path surfaces correctly and does + * not prevent other files from being processed. + */ + @Test + public void testBatchParsingContinuesAfterPerFileError() throws IOException { + int totalFiles = 4; + for (int i = 1; i <= totalFiles; i++) { + Files.writeString(tempDir.resolve("ErrFile" + i + ".java"), + "public class ErrFile" + i + " {}"); + } + + Set successfulFiles = ConcurrentHashMap.newKeySet(); + + UseCase useCase = new UseCase() { + @Override + public Set getOperations() { + return Set.of((cu, node, rewriter) -> { + Path p = (Path) cu.getProperty(CompilationUnitProperty.ABSOLUTE_PATH); + if (p != null && p.getFileName().toString().equals("ErrFile2.java")) { + throw new RuntimeException("Intentional batch-path failure for ErrFile2"); + } + if (p != null) { + successfulFiles.add(p); + } + }); + } + + @Override + public int getParallelism() { + return 2; + } + }; + + try { + AstraCore.run(tempDir.toString(), useCase); + fail("Expected a RuntimeException due to ErrFile2 failing"); + } catch (RuntimeException e) { + assertNotNull(e.getMessage()); + } + + assertEquals("Non-failing files should still be processed", totalFiles - 1, successfulFiles.size()); + } + + + /** + * Verifies that files are processed correctly when the file count exceeds the batch size, + * exercising the multi-chunk code path. The test overrides {@link UseCase#getBatchSize()} to + * return {@code 3} and creates {@code 7} files, forcing three chunks (3 + 3 + 1). All seven + * files should be visited exactly once. + */ + @Test + public void testChunkedBatchingVisitsAllFiles() throws IOException { + int fileCount = 7; + for (int i = 1; i <= fileCount; i++) { + Files.writeString(tempDir.resolve("Chunked" + i + ".java"), + "public class Chunked" + i + " {}"); + } + + Set visited = ConcurrentHashMap.newKeySet(); + + AstraCore.run(tempDir.toString(), new UseCase() { + @Override + public Set getOperations() { + return Set.of((cu, node, rewriter) -> { + Path p = (Path) cu.getProperty(CompilationUnitProperty.ABSOLUTE_PATH); + if (p != null) { + visited.add(p); + } + }); + } + + @Override + public int getBatchSize() { + return 3; // force multiple chunks: ceil(7/3) = 3 chunks + } + }); + + assertEquals("All files should be visited across multiple chunks", fileCount, visited.size()); + } + + + /** + * Verifies that a run with zero files after content prefiltering (all files excluded) completes + * cleanly without errors, and the batch parse is not called with an empty file list. + */ + @Test + public void testBatchParsingWithAllFilesContentFiltered() throws IOException { + Files.writeString(tempDir.resolve("FilteredOut.java"), "public class FilteredOut {}"); + + Set visited = ConcurrentHashMap.newKeySet(); + + AstraCore.run(tempDir.toString(), new UseCase() { + @Override + public Predicate getContentPrefilteringPredicate() { + return content -> false; // reject everything + } + + @Override + public Set getOperations() { + return Set.of((cu, node, rewriter) -> { + Path p = (Path) cu.getProperty(CompilationUnitProperty.ABSOLUTE_PATH); + if (p != null) { + visited.add(p); + } + }); + } + }); + + assertTrue("No files should be visited when content predicate rejects all", visited.isEmpty()); + } +}