-
Notifications
You must be signed in to change notification settings - Fork 11
Fix NormalizeToSubset creating duplicate decomp_N symbols #216
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| import { registerSourceCode } from "@specs-feup/lara/jest/jestHelpers.js"; | ||
| import Query from "@specs-feup/lara/api/weaver/Query.js"; | ||
| import { FunctionJp, Vardecl } from "../../Joinpoints.js"; | ||
| import NormalizeToSubset from "../opt/NormalizeToSubset.js"; | ||
|
|
||
| const codeWithExistingDecompVars = ` | ||
| int foo(int a) { | ||
| int decomp_0 = 5; | ||
| int decomp_1 = 10; | ||
| int result = (a + 1) * (decomp_0 + decomp_1); | ||
| return result; | ||
| }`; | ||
|
|
||
| describe("StatementDecomposer duplicate symbols", () => { | ||
| registerSourceCode(codeWithExistingDecompVars); | ||
|
|
||
| it("should not create duplicate decomp_ symbols when normalizing", () => { | ||
| const functionJp = Query.search(FunctionJp, { name: "foo" }).first(); | ||
|
|
||
| if (functionJp === undefined) { | ||
| fail("Function not found"); | ||
| } | ||
|
|
||
| // Get all variable names before normalization | ||
| const varsBefore = Query.searchFrom(functionJp, Vardecl).map(v => v.name); | ||
| expect(varsBefore).toContain("decomp_0"); | ||
| expect(varsBefore).toContain("decomp_1"); | ||
|
|
||
| // Apply normalization | ||
| NormalizeToSubset(functionJp); | ||
|
|
||
| // Get all variable names after normalization | ||
| const varsAfter = Query.searchFrom(functionJp, Vardecl).map(v => v.name); | ||
|
|
||
| // Check that all variable names are unique | ||
| const uniqueVars = new Set(varsAfter); | ||
| expect(uniqueVars.size).toBe(varsAfter.length); | ||
|
|
||
| // Ensure no duplicate decomp_0 or decomp_1 | ||
| const decompCounts: Record<string, number> = {}; | ||
| for (const varName of varsAfter) { | ||
| if (varName.startsWith("decomp_")) { | ||
| decompCounts[varName] = (decompCounts[varName] || 0) + 1; | ||
| } | ||
| } | ||
|
|
||
| for (const [varName, count] of Object.entries(decompCounts)) { | ||
| expect(count).toBe(1); | ||
| } | ||
| }); | ||
| }); | ||
|
|
||
| const codeMultipleNormalization = ` | ||
| int bar(int x) { | ||
| return (x + 1) * (x + 2); | ||
| }`; | ||
|
|
||
| describe("StatementDecomposer multiple normalizations", () => { | ||
| registerSourceCode(codeMultipleNormalization); | ||
|
|
||
| it("should not create duplicate symbols when normalizing multiple times", () => { | ||
| const functionJp = Query.search(FunctionJp, { name: "bar" }).first(); | ||
|
|
||
| if (functionJp === undefined) { | ||
| fail("Function not found"); | ||
| } | ||
|
|
||
| // Apply normalization twice | ||
| NormalizeToSubset(functionJp); | ||
| NormalizeToSubset(functionJp); | ||
|
|
||
| // Get all variable names | ||
| const vars = Query.searchFrom(functionJp, Vardecl).map(v => v.name); | ||
|
|
||
| // Check that all variable names are unique | ||
| const uniqueVars = new Set(vars); | ||
| expect(uniqueVars.size).toBe(vars.length); | ||
| }); | ||
| }); | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,16 +1,20 @@ | ||||||||||||||||||||||
| import { debug } from "@specs-feup/lara/api/lara/core/LaraCore.js"; | ||||||||||||||||||||||
| import IdGenerator from "@specs-feup/lara/api/lara/util/IdGenerator.js"; | ||||||||||||||||||||||
| import Query from "@specs-feup/lara/api/weaver/Query.js"; | ||||||||||||||||||||||
| import { | ||||||||||||||||||||||
| BinaryOp, | ||||||||||||||||||||||
| Call, | ||||||||||||||||||||||
| Case, | ||||||||||||||||||||||
| Decl, | ||||||||||||||||||||||
| type Decl, | ||||||||||||||||||||||
| DeclStmt, | ||||||||||||||||||||||
| EmptyStmt, | ||||||||||||||||||||||
| ExprStmt, | ||||||||||||||||||||||
| Expression, | ||||||||||||||||||||||
| Joinpoint, | ||||||||||||||||||||||
| type Expression, | ||||||||||||||||||||||
| FunctionJp, | ||||||||||||||||||||||
| type Joinpoint, | ||||||||||||||||||||||
| LabelStmt, | ||||||||||||||||||||||
| MemberCall, | ||||||||||||||||||||||
| type Param, | ||||||||||||||||||||||
| ReturnStmt, | ||||||||||||||||||||||
| Scope, | ||||||||||||||||||||||
| Statement, | ||||||||||||||||||||||
|
|
@@ -25,17 +29,81 @@ import DecomposeResult from "./DecomposeResult.js"; | |||||||||||||||||||||
| * Decomposes complex statements into several simpler ones. | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| export default class StatementDecomposer { | ||||||||||||||||||||||
| tempPrefix; | ||||||||||||||||||||||
| startIndex; | ||||||||||||||||||||||
| public tempPrefix; | ||||||||||||||||||||||
| public startIndex; | ||||||||||||||||||||||
| public useGlobalIds; | ||||||||||||||||||||||
| private symbolTable: Set<string> | undefined; | ||||||||||||||||||||||
| private currentFunction: FunctionJp | undefined; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| constructor(tempPrefix: string = "decomp_", startIndex: number = 0) { | ||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * Creates a new StatementDecomposer. | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * @param tempPrefix - Prefix for temporary variable names | ||||||||||||||||||||||
| * @param startIndex - Starting index for temporary variable names (ignored if useGlobalIds is true) | ||||||||||||||||||||||
| * @param useGlobalIds - If true, uses global IdGenerator to avoid duplicate symbols across multiple normalizations | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| public constructor(tempPrefix = "decomp_", startIndex = 0, useGlobalIds = false) { | ||||||||||||||||||||||
| this.tempPrefix = tempPrefix; | ||||||||||||||||||||||
| this.startIndex = startIndex; | ||||||||||||||||||||||
| this.useGlobalIds = useGlobalIds; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| /** | ||||||||||||||||||||||
| * Builds a symbol table of existing variable declarations and parameters in the given scope. | ||||||||||||||||||||||
| * This is used to avoid creating duplicate variable names. | ||||||||||||||||||||||
| * | ||||||||||||||||||||||
| * @param $scope - The scope to build the symbol table from (typically a function) | ||||||||||||||||||||||
| */ | ||||||||||||||||||||||
| private buildSymbolTable($scope: Joinpoint): void { | ||||||||||||||||||||||
| // Get the enclosing function if we're not already at a function | ||||||||||||||||||||||
| const $function = $scope instanceof FunctionJp | ||||||||||||||||||||||
| ? $scope | ||||||||||||||||||||||
| : $scope.getAncestor("function") as FunctionJp | undefined; | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
| // If we're in the same function as before, reuse the symbol table | ||||||||||||||||||||||
| if ($function === this.currentFunction && this.symbolTable !== undefined) { | ||||||||||||||||||||||
| return; | ||||||||||||||||||||||
| } | ||||||||||||||||||||||
|
|
||||||||||||||||||||||
|
Comment on lines
+63
to
+67
|
||||||||||||||||||||||
| // If we're in the same function as before, reuse the symbol table | |
| if ($function === this.currentFunction && this.symbolTable !== undefined) { | |
| return; | |
| } | |
| // Always rebuild the symbol table to reflect the current state of the AST | |
Copilot
AI
Nov 6, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When useGlobalIds is true, the newTempVarname() method uses IdGenerator.next() but doesn't check the generated name against existing variables in the symbol table. This could create naming conflicts if the source code already contains variables like decomp_N that happen to match names generated by the IdGenerator.
Consider adding a check against the symbol table even when using global IDs:
if (this.useGlobalIds) {
let varName = IdGenerator.next(this.tempPrefix);
// Ensure uniqueness against existing variables
if (this.symbolTable !== undefined) {
while (this.symbolTable.has(varName)) {
varName = IdGenerator.next(this.tempPrefix);
}
this.symbolTable.add(varName);
}
return varName;
}| return IdGenerator.next(this.tempPrefix); | |
| let varName = IdGenerator.next(this.tempPrefix); | |
| // Ensure uniqueness against existing variables in the symbol table | |
| if (this.symbolTable !== undefined) { | |
| while (this.symbolTable.has(varName)) { | |
| varName = IdGenerator.next(this.tempPrefix); | |
| } | |
| this.symbolTable.add(varName); | |
| } | |
| return varName; |
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't add random docs. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| # NormalizeToSubset Usage Examples | ||
|
|
||
| ## Basic Usage | ||
|
|
||
| ```typescript | ||
| import NormalizeToSubset from "./NormalizeToSubset.js"; | ||
| import Query from "@specs-feup/lara/api/weaver/Query.js"; | ||
|
|
||
| // Apply normalization to the entire program | ||
| NormalizeToSubset(Query.root()); | ||
| ``` | ||
|
|
||
| ## With Options | ||
|
|
||
| ### Using Global IDs (Recommended for Multiple Normalizations) | ||
|
|
||
| When you need to apply normalization multiple times over the same AST region, use the `useGlobalIds` option to prevent duplicate symbol generation: | ||
|
|
||
| ```typescript | ||
| import NormalizeToSubset from "./NormalizeToSubset.js"; | ||
| import Query from "@specs-feup/lara/api/weaver/Query.js"; | ||
| import { FunctionJp } from "../../Joinpoints.js"; | ||
|
|
||
| const functionJp = Query.search(FunctionJp, { name: "myFunction" }).first(); | ||
|
|
||
| // Apply normalization with global ID generation | ||
| NormalizeToSubset(functionJp, { useGlobalIds: true }); | ||
|
|
||
| // Safe to apply again - will not create duplicate symbols | ||
| NormalizeToSubset(functionJp, { useGlobalIds: true }); | ||
| ``` | ||
|
|
||
| ### Custom Loop Simplification | ||
|
|
||
| ```typescript | ||
| import NormalizeToSubset from "./NormalizeToSubset.js"; | ||
| import Query from "@specs-feup/lara/api/weaver/Query.js"; | ||
|
|
||
| // Customize loop simplification behavior | ||
| NormalizeToSubset(Query.root(), { | ||
| simplifyLoops: { forToWhile: false }, | ||
| useGlobalIds: true | ||
| }); | ||
| ``` | ||
|
|
||
| ## Symbol Table Management | ||
|
|
||
| The StatementDecomposer automatically builds a symbol table of existing variable declarations and parameters before generating new variable names. This prevents conflicts with existing `decomp_N` variables in your code. | ||
|
|
||
| ### Example | ||
|
|
||
| Given this code: | ||
| ```c | ||
| int foo(int a) { | ||
| int decomp_0 = 5; // Existing variable | ||
| int result = (a + 1) * (2 + 3); | ||
| return result; | ||
| } | ||
| ``` | ||
|
|
||
| When normalized, the transformation will create `decomp_1`, `decomp_2`, etc., skipping `decomp_0` to avoid conflicts. | ||
|
|
||
| ## Implementation Details | ||
|
|
||
| - **Symbol Table**: Collects all parameter and variable declaration names from the enclosing function | ||
| - **Local Counter**: By default, uses a local counter that checks against the symbol table | ||
| - **Global ID Generator**: When `useGlobalIds: true`, uses `IdGenerator.next("decomp_")` for globally unique names across the entire transformation session |
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -1,6 +1,6 @@ | ||||||||||||
| import { LaraJoinPoint } from "@specs-feup/lara/api/LaraJoinPoint.js"; | ||||||||||||
| import Query from "@specs-feup/lara/api/weaver/Query.js"; | ||||||||||||
| import { BinaryOp, Joinpoint } from "../../Joinpoints.js"; | ||||||||||||
| import type { Joinpoint } from "../../Joinpoints.js"; | ||||||||||||
| import { BinaryOp } from "../../Joinpoints.js"; | ||||||||||||
|
Comment on lines
+2
to
+3
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
| import SimplifyAssignment from "../code/SimplifyAssignment.js"; | ||||||||||||
| import StatementDecomposer from "../code/StatementDecomposer.js"; | ||||||||||||
| import DecomposeDeclStmt from "../pass/DecomposeDeclStmt.js"; | ||||||||||||
|
|
@@ -11,22 +11,31 @@ import SimplifyReturnStmts from "../pass/SimplifyReturnStmts.js"; | |||||||||||
| import SimplifySelectionStmts from "../pass/SimplifySelectionStmts.js"; | ||||||||||||
|
|
||||||||||||
| /** | ||||||||||||
| * | ||||||||||||
| * @param $startJp - | ||||||||||||
| * @param options - Object with options. See default value for supported options. | ||||||||||||
| * Normalizes code to a simpler subset of C/C++. | ||||||||||||
| * | ||||||||||||
| * @param $startJp - Starting join point for normalization | ||||||||||||
| * @param options - Configuration options for normalization | ||||||||||||
| */ | ||||||||||||
| export default function NormalizeToSubset( | ||||||||||||
| $startJp: Joinpoint, | ||||||||||||
| options = { simplifyLoops: { forToWhile: true } } | ||||||||||||
| options: { simplifyLoops?: { forToWhile: boolean }, useGlobalIds?: boolean } = {} | ||||||||||||
| ) { | ||||||||||||
| const _options = options; | ||||||||||||
| const _options = { | ||||||||||||
| simplifyLoops: { forToWhile: true }, | ||||||||||||
| useGlobalIds: false, | ||||||||||||
| ...options | ||||||||||||
|
Comment on lines
+24
to
+26
|
||||||||||||
| simplifyLoops: { forToWhile: true }, | |
| useGlobalIds: false, | |
| ...options | |
| simplifyLoops: { forToWhile: true, ...(options.simplifyLoops ?? {}) }, | |
| useGlobalIds: options.useGlobalIds ?? false, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unused variable varName.