Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions Clava-JS/src-api/clava/code/StatementDecomposer.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
import { registerSourceCode } from "@specs-feup/lara/jest/jestHelpers.js";
import Query from "@specs-feup/lara/api/weaver/Query.js";
import { FunctionJp, Vardecl } from "../../Joinpoints.js";
import NormalizeToSubset from "../opt/NormalizeToSubset.js";

const codeWithExistingDecompVars = `
int foo(int a) {
int decomp_0 = 5;
int decomp_1 = 10;
int result = (a + 1) * (decomp_0 + decomp_1);
return result;
}`;

describe("StatementDecomposer duplicate symbols", () => {
registerSourceCode(codeWithExistingDecompVars);

it("should not create duplicate decomp_ symbols when normalizing", () => {
const functionJp = Query.search(FunctionJp, { name: "foo" }).first();

if (functionJp === undefined) {
fail("Function not found");
}

// Get all variable names before normalization
const varsBefore = Query.searchFrom(functionJp, Vardecl).map(v => v.name);
expect(varsBefore).toContain("decomp_0");
expect(varsBefore).toContain("decomp_1");

// Apply normalization
NormalizeToSubset(functionJp);

// Get all variable names after normalization
const varsAfter = Query.searchFrom(functionJp, Vardecl).map(v => v.name);

// Check that all variable names are unique
const uniqueVars = new Set(varsAfter);
expect(uniqueVars.size).toBe(varsAfter.length);

// Ensure no duplicate decomp_0 or decomp_1
const decompCounts: Record<string, number> = {};
for (const varName of varsAfter) {
if (varName.startsWith("decomp_")) {
decompCounts[varName] = (decompCounts[varName] || 0) + 1;
}
}

for (const [varName, count] of Object.entries(decompCounts)) {
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unused variable varName.

Suggested change
for (const [varName, count] of Object.entries(decompCounts)) {
for (const [, count] of Object.entries(decompCounts)) {

Copilot uses AI. Check for mistakes.
expect(count).toBe(1);
}
});
});

const codeMultipleNormalization = `
int bar(int x) {
return (x + 1) * (x + 2);
}`;

describe("StatementDecomposer multiple normalizations", () => {
registerSourceCode(codeMultipleNormalization);

it("should not create duplicate symbols when normalizing multiple times", () => {
const functionJp = Query.search(FunctionJp, { name: "bar" }).first();

if (functionJp === undefined) {
fail("Function not found");
}

// Apply normalization twice
NormalizeToSubset(functionJp);
NormalizeToSubset(functionJp);

// Get all variable names
const vars = Query.searchFrom(functionJp, Vardecl).map(v => v.name);

// Check that all variable names are unique
const uniqueVars = new Set(vars);
expect(uniqueVars.size).toBe(vars.length);
});
});
85 changes: 78 additions & 7 deletions Clava-JS/src-api/clava/code/StatementDecomposer.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import { debug } from "@specs-feup/lara/api/lara/core/LaraCore.js";
import IdGenerator from "@specs-feup/lara/api/lara/util/IdGenerator.js";
import Query from "@specs-feup/lara/api/weaver/Query.js";
import {
BinaryOp,
Call,
Case,
Decl,
type Decl,
DeclStmt,
EmptyStmt,
ExprStmt,
Expression,
Joinpoint,
type Expression,
FunctionJp,
type Joinpoint,
LabelStmt,
MemberCall,
type Param,
ReturnStmt,
Scope,
Statement,
Expand All @@ -25,17 +29,81 @@ import DecomposeResult from "./DecomposeResult.js";
* Decomposes complex statements into several simpler ones.
*/
export default class StatementDecomposer {
tempPrefix;
startIndex;
public tempPrefix;
public startIndex;
public useGlobalIds;
private symbolTable: Set<string> | undefined;
private currentFunction: FunctionJp | undefined;

constructor(tempPrefix: string = "decomp_", startIndex: number = 0) {
/**
* Creates a new StatementDecomposer.
*
* @param tempPrefix - Prefix for temporary variable names
* @param startIndex - Starting index for temporary variable names (ignored if useGlobalIds is true)
* @param useGlobalIds - If true, uses global IdGenerator to avoid duplicate symbols across multiple normalizations
*/
public constructor(tempPrefix = "decomp_", startIndex = 0, useGlobalIds = false) {
this.tempPrefix = tempPrefix;
this.startIndex = startIndex;
this.useGlobalIds = useGlobalIds;
}

/**
* Builds a symbol table of existing variable declarations and parameters in the given scope.
* This is used to avoid creating duplicate variable names.
*
* @param $scope - The scope to build the symbol table from (typically a function)
*/
private buildSymbolTable($scope: Joinpoint): void {
// Get the enclosing function if we're not already at a function
const $function = $scope instanceof FunctionJp
? $scope
: $scope.getAncestor("function") as FunctionJp | undefined;

// If we're in the same function as before, reuse the symbol table
if ($function === this.currentFunction && this.symbolTable !== undefined) {
return;
}

Comment on lines +63 to +67
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The symbol table caching logic has a bug. When the same StatementDecomposer instance is used to process the same function multiple times (e.g., by different passes in NormalizeToSubset), the symbol table is cached and reused. However, since variables are added to the AST during processing, the cached symbol table becomes stale and doesn't include newly added variables. This can lead to duplicate variable names being generated.

The condition if ($function === this.currentFunction && this.symbolTable !== undefined) should be removed, and the symbol table should be rebuilt on every call to ensure it accurately reflects the current state of the AST.

Suggested change
// If we're in the same function as before, reuse the symbol table
if ($function === this.currentFunction && this.symbolTable !== undefined) {
return;
}
// Always rebuild the symbol table to reflect the current state of the AST

Copilot uses AI. Check for mistakes.
// Reset for new function
this.currentFunction = $function;
this.symbolTable = new Set<string>();

if ($function === undefined) {
return;
}

// Add parameter names to symbol table
for (const $param of $function.params as Param[]) {
this.symbolTable.add($param.name);
}

// Add all variable declarations in the function to symbol table
for (const $vardecl of Query.searchFrom($function, Vardecl)) {
this.symbolTable.add($vardecl.name);
}
}

private newTempVarname() {
const varName = `${this.tempPrefix}${this.startIndex}`;
if (this.useGlobalIds) {
// Use global IdGenerator to ensure unique names across multiple normalizations
return IdGenerator.next(this.tempPrefix);
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When useGlobalIds is true, the newTempVarname() method uses IdGenerator.next() but doesn't check the generated name against existing variables in the symbol table. This could create naming conflicts if the source code already contains variables like decomp_N that happen to match names generated by the IdGenerator.

Consider adding a check against the symbol table even when using global IDs:

if (this.useGlobalIds) {
  let varName = IdGenerator.next(this.tempPrefix);
  // Ensure uniqueness against existing variables
  if (this.symbolTable !== undefined) {
    while (this.symbolTable.has(varName)) {
      varName = IdGenerator.next(this.tempPrefix);
    }
    this.symbolTable.add(varName);
  }
  return varName;
}
Suggested change
return IdGenerator.next(this.tempPrefix);
let varName = IdGenerator.next(this.tempPrefix);
// Ensure uniqueness against existing variables in the symbol table
if (this.symbolTable !== undefined) {
while (this.symbolTable.has(varName)) {
varName = IdGenerator.next(this.tempPrefix);
}
this.symbolTable.add(varName);
}
return varName;

Copilot uses AI. Check for mistakes.
}

// Use local counter with symbol table checking to avoid duplicates
let varName = `${this.tempPrefix}${this.startIndex}`;
this.startIndex++;

// If we have a symbol table, ensure the name is unique
if (this.symbolTable !== undefined) {
while (this.symbolTable.has(varName)) {
varName = `${this.tempPrefix}${this.startIndex}`;
this.startIndex++;
}
// Add the new name to the symbol table
this.symbolTable.add(varName);
}

return varName;
}

Expand Down Expand Up @@ -94,6 +162,9 @@ export default class StatementDecomposer {
* @returns An array with the new statements, or an empty array if no decomposition could be made
*/
decompose($stmt: Statement): Statement[] {
// Build/refresh symbol table for the current function to avoid duplicate variable names
this.buildSymbolTable($stmt);

try {
return this.decomposeStmt($stmt);
} catch (e) {
Expand Down
67 changes: 67 additions & 0 deletions Clava-JS/src-api/clava/opt/NormalizeToSubset.example.md
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't add random docs.

Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# NormalizeToSubset Usage Examples

## Basic Usage

```typescript
import NormalizeToSubset from "./NormalizeToSubset.js";
import Query from "@specs-feup/lara/api/weaver/Query.js";

// Apply normalization to the entire program
NormalizeToSubset(Query.root());
```

## With Options

### Using Global IDs (Recommended for Multiple Normalizations)

When you need to apply normalization multiple times over the same AST region, use the `useGlobalIds` option to prevent duplicate symbol generation:

```typescript
import NormalizeToSubset from "./NormalizeToSubset.js";
import Query from "@specs-feup/lara/api/weaver/Query.js";
import { FunctionJp } from "../../Joinpoints.js";

const functionJp = Query.search(FunctionJp, { name: "myFunction" }).first();

// Apply normalization with global ID generation
NormalizeToSubset(functionJp, { useGlobalIds: true });

// Safe to apply again - will not create duplicate symbols
NormalizeToSubset(functionJp, { useGlobalIds: true });
```

### Custom Loop Simplification

```typescript
import NormalizeToSubset from "./NormalizeToSubset.js";
import Query from "@specs-feup/lara/api/weaver/Query.js";

// Customize loop simplification behavior
NormalizeToSubset(Query.root(), {
simplifyLoops: { forToWhile: false },
useGlobalIds: true
});
```

## Symbol Table Management

The StatementDecomposer automatically builds a symbol table of existing variable declarations and parameters before generating new variable names. This prevents conflicts with existing `decomp_N` variables in your code.

### Example

Given this code:
```c
int foo(int a) {
int decomp_0 = 5; // Existing variable
int result = (a + 1) * (2 + 3);
return result;
}
```

When normalized, the transformation will create `decomp_1`, `decomp_2`, etc., skipping `decomp_0` to avoid conflicts.

## Implementation Details

- **Symbol Table**: Collects all parameter and variable declaration names from the enclosing function
- **Local Counter**: By default, uses a local counter that checks against the symbol table
- **Global ID Generator**: When `useGlobalIds: true`, uses `IdGenerator.next("decomp_")` for globally unique names across the entire transformation session
27 changes: 18 additions & 9 deletions Clava-JS/src-api/clava/opt/NormalizeToSubset.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { LaraJoinPoint } from "@specs-feup/lara/api/LaraJoinPoint.js";
import Query from "@specs-feup/lara/api/weaver/Query.js";
import { BinaryOp, Joinpoint } from "../../Joinpoints.js";
import type { Joinpoint } from "../../Joinpoints.js";
import { BinaryOp } from "../../Joinpoints.js";
Comment on lines +2 to +3
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
import type { Joinpoint } from "../../Joinpoints.js";
import { BinaryOp } from "../../Joinpoints.js";
import type { BinaryOp, type Joinpoint } from "../../Joinpoints.js";

import SimplifyAssignment from "../code/SimplifyAssignment.js";
import StatementDecomposer from "../code/StatementDecomposer.js";
import DecomposeDeclStmt from "../pass/DecomposeDeclStmt.js";
Expand All @@ -11,22 +11,31 @@ import SimplifyReturnStmts from "../pass/SimplifyReturnStmts.js";
import SimplifySelectionStmts from "../pass/SimplifySelectionStmts.js";

/**
*
* @param $startJp -
* @param options - Object with options. See default value for supported options.
* Normalizes code to a simpler subset of C/C++.
*
* @param $startJp - Starting join point for normalization
* @param options - Configuration options for normalization
*/
export default function NormalizeToSubset(
$startJp: Joinpoint,
options = { simplifyLoops: { forToWhile: true } }
options: { simplifyLoops?: { forToWhile: boolean }, useGlobalIds?: boolean } = {}
) {
const _options = options;
const _options = {
simplifyLoops: { forToWhile: true },
useGlobalIds: false,
...options
Comment on lines +24 to +26
Copy link

Copilot AI Nov 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The options merging logic doesn't properly handle partial simplifyLoops objects. If a user passes { simplifyLoops: {} }, the spread operator will completely overwrite the default { forToWhile: true }, resulting in { forToWhile: undefined } instead of keeping the default value.

Consider using a deeper merge strategy:

const _options = {
  simplifyLoops: { forToWhile: true, ...options.simplifyLoops },
  useGlobalIds: options.useGlobalIds ?? false,
};
Suggested change
simplifyLoops: { forToWhile: true },
useGlobalIds: false,
...options
simplifyLoops: { forToWhile: true, ...(options.simplifyLoops ?? {}) },
useGlobalIds: options.useGlobalIds ?? false,

Copilot uses AI. Check for mistakes.
};

const declStmt = new DecomposeDeclStmt();
const varDecls = new DecomposeVarDeclarations();
const statementDecomposer = new StatementDecomposer();
const statementDecomposer = new StatementDecomposer(
"decomp_",
0,
_options.useGlobalIds
);
const simplifyLoops = new SimplifyLoops(
statementDecomposer,
_options["simplifyLoops"]
_options.simplifyLoops
);
const simplifyIfs = new SimplifySelectionStmts(statementDecomposer);
const simplifyReturns = new SimplifyReturnStmts(statementDecomposer);
Expand Down
Loading