diff --git a/core/src/main/kotlin/org/evomaster/core/solver/SMTConditionVisitor.kt b/core/src/main/kotlin/org/evomaster/core/solver/SMTConditionVisitor.kt index eb12789a79..91364728db 100644 --- a/core/src/main/kotlin/org/evomaster/core/solver/SMTConditionVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/solver/SMTConditionVisitor.kt @@ -1,6 +1,7 @@ package org.evomaster.core.solver import org.evomaster.client.java.controller.api.dto.database.schema.TableDto +import org.evomaster.core.utils.StringUtils.convertToAscii import org.evomaster.dbconstraint.ast.* import org.evomaster.solver.smtlib.AssertSMTNode import org.evomaster.solver.smtlib.EmptySMTNode @@ -26,12 +27,18 @@ class SMTConditionVisitor( /** * Constructs a column reference string for SMT-LIB from a table name and column name. * + * Both names are converted to ASCII because SMT-LIB unquoted symbols only allow ASCII characters. + * Table and column names may come directly from SQL query text (e.g., a WHERE clause), which can + * contain non-ASCII characters if the schema uses them. The conversion must happen here because, + * unlike schema-derived names that are pre-converted via [SmtTable], query-derived names are + * parsed at runtime from raw SQL strings. + * * @param tableName The name of the table. * @param columnName The name of the column. * @return The SMT-LIB column reference string. */ private fun getColumnReference(tableName: String, columnName: String): String { - return "(${columnName.uppercase()} ${tableName.lowercase()}$rowIndex)" + return "(${convertToAscii(columnName).uppercase()} ${convertToAscii(tableName).lowercase()}$rowIndex)" } /** diff --git a/core/src/main/kotlin/org/evomaster/core/solver/SMTLibZ3DbConstraintSolver.kt b/core/src/main/kotlin/org/evomaster/core/solver/SMTLibZ3DbConstraintSolver.kt index f2cbbc287e..13fdb06665 100644 --- a/core/src/main/kotlin/org/evomaster/core/solver/SMTLibZ3DbConstraintSolver.kt +++ b/core/src/main/kotlin/org/evomaster/core/solver/SMTLibZ3DbConstraintSolver.kt @@ -20,6 +20,7 @@ import org.evomaster.core.search.gene.sql.SqlPrimaryKeyGene import org.evomaster.core.search.gene.string.StringGene import org.evomaster.core.sql.SqlAction import org.evomaster.core.sql.schema.* +import org.evomaster.core.utils.StringUtils.convertToAscii import org.evomaster.solver.Z3DockerExecutor import org.evomaster.solver.smtlib.SMTLib import org.evomaster.solver.smtlib.value.* @@ -152,18 +153,24 @@ class SMTLibZ3DbConstraintSolver() : DbConstraintSolver { // Create the list of genes with the values val genes = mutableListOf() - for (columnName in columns.fields) { - var gene: Gene = IntegerGene(columnName, 0) - when (val columnValue = columns.getField(columnName)) { + // smtColumn is the Ascii version from SmtLib; resolve back to original DB column name + for (smtColumn in columns.fields) { + val dbColumn = table.columns.firstOrNull { + convertToAscii(it.name).equals(smtColumn, ignoreCase = true) + } + val dbColumnName = dbColumn?.name ?: smtColumn + + var gene: Gene = IntegerGene(dbColumnName, 0) + when (val columnValue = columns.getField(smtColumn)) { is StringValue -> { - gene = if (hasColumnType(schemaDto, table, columnName, "BOOLEAN")) { - BooleanGene(columnName, toBoolean(columnValue.value)) + gene = if (hasColumnType(schemaDto, table, dbColumnName, "BOOLEAN")) { + BooleanGene(dbColumnName, toBoolean(columnValue.value)) } else { - StringGene(columnName, columnValue.value) + StringGene(dbColumnName, columnValue.value) } } is LongValue -> { - gene = if (hasColumnType(schemaDto, table, columnName, "TIMESTAMP")) { + gene = if (hasColumnType(schemaDto, table, dbColumnName, "TIMESTAMP")) { val epochSeconds = columnValue.value.toLong() val localDateTime = LocalDateTime.ofInstant( Instant.ofEpochSecond(epochSeconds), ZoneOffset.UTC @@ -171,18 +178,17 @@ class SMTLibZ3DbConstraintSolver() : DbConstraintSolver { val formatted = localDateTime.format( DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") ) - ImmutableDataHolderGene(columnName, formatted, inQuotes = true) + ImmutableDataHolderGene(dbColumnName, formatted, inQuotes = true) } else { - IntegerGene(columnName, columnValue.value.toInt()) + IntegerGene(dbColumnName, columnValue.value.toInt()) } } is RealValue -> { - gene = DoubleGene(columnName, columnValue.value) + gene = DoubleGene(dbColumnName, columnValue.value) } } - val currentColumn = table.columns.firstOrNull(){ it.name.equals(columnName, ignoreCase = true) } - if (currentColumn != null && currentColumn.primaryKey) { - gene = SqlPrimaryKeyGene(columnName, table.id, gene, actionId) + if (dbColumn != null && dbColumn.primaryKey) { + gene = SqlPrimaryKeyGene(dbColumnName, table.id, gene, actionId) } gene.markAllAsInitialized() genes.add(gene) diff --git a/core/src/main/kotlin/org/evomaster/core/solver/SmtLibGenerator.kt b/core/src/main/kotlin/org/evomaster/core/solver/SmtLibGenerator.kt index a963e53dec..26c76bb849 100644 --- a/core/src/main/kotlin/org/evomaster/core/solver/SmtLibGenerator.kt +++ b/core/src/main/kotlin/org/evomaster/core/solver/SmtLibGenerator.kt @@ -11,15 +11,14 @@ import org.evomaster.client.java.controller.api.dto.database.schema.DbInfoDto import org.evomaster.client.java.controller.api.dto.database.schema.ForeignKeyDto import org.evomaster.client.java.controller.api.dto.database.schema.TableDto import org.evomaster.core.logging.LoggingUtil -import org.evomaster.core.utils.StringUtils import org.evomaster.dbconstraint.ConstraintDatabaseType import org.evomaster.dbconstraint.ast.SqlCondition import net.sf.jsqlparser.JSQLParserException +import org.evomaster.core.utils.StringUtils.convertToAscii import org.evomaster.dbconstraint.parser.SqlConditionParserException import org.evomaster.dbconstraint.parser.jsql.JSqlConditionParser import org.evomaster.solver.smtlib.* import org.evomaster.solver.smtlib.assertion.* -import java.util.* /** * Generates SMT-LIB constraints from SQL queries and schema definitions. @@ -31,6 +30,9 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I private var parser = JSqlConditionParser() + private val smtTables: List = schema.tables.map { SmtTable(it) } + private val smtTableByOriginalName: Map = smtTables.associateBy { it.originalName } + /** * Main method to generate SMT-LIB representation from SQL query. * @@ -57,17 +59,15 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * @param smt The SMT-LIB object to which table definitions are added. */ private fun appendTableDefinitions(smt: SMTLib) { - for (table in schema.tables) { - val dataTypeName = "${StringUtils.capitalization(table.id.name)}Row" - + for (smtTable in smtTables) { // Declare datatype for the table smt.addNode( - DeclareDatatypeSMTNode(dataTypeName, getConstructors(table)) + DeclareDatatypeSMTNode(smtTable.dataTypeName, getConstructors(smtTable)) ) // Declare constants for each row for (i in 1..numberOfRows) { - smt.addNode(DeclareConstSMTNode("${table.id.name.lowercase()}$i", dataTypeName)) + smt.addNode(DeclareConstSMTNode("${smtTable.smtName}$i", smtTable.dataTypeName)) } } } @@ -78,9 +78,9 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * @param smt The SMT-LIB object to which table constraints are added. */ private fun appendTableConstraints(smt: SMTLib) { - for (table in schema.tables) { - appendUniqueConstraints(smt, table) - appendCheckConstraints(smt, table) + for (smtTable in smtTables) { + appendUniqueConstraints(smt, smtTable) + appendCheckConstraints(smt, smtTable) } } @@ -88,13 +88,12 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * Appends unique constraints for each table to the SMT-LIB. * * @param smt The SMT-LIB object to which unique constraints are added. - * @param table The table for which unique constraints are added. + * @param smtTable The table for which unique constraints are added. */ - private fun appendUniqueConstraints(smt: SMTLib, table: TableDto) { - val tableName = table.id.name.lowercase() - for (column in table.columns) { + private fun appendUniqueConstraints(smt: SMTLib, smtTable: SmtTable) { + for (column in smtTable.dto.columns) { if (column.unique) { - val nodes = assertForDistinctField(column.name, tableName) + val nodes = assertForDistinctField(smtTable.smtColumnName(column.name), smtTable.smtName) smt.addNodes(nodes) } } @@ -104,14 +103,14 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * Appends check constraints for each table to the SMT-LIB. * * @param smt The SMT-LIB object to which check constraints are added. - * @param table The table for which check constraints are added. + * @param smtTable The table for which check constraints are added. */ - private fun appendCheckConstraints(smt: SMTLib, table: TableDto) { - for (check in table.tableCheckExpressions) { + private fun appendCheckConstraints(smt: SMTLib, smtTable: SmtTable) { + for (check in smtTable.dto.tableCheckExpressions) { try { val condition: SqlCondition = parser.parse(check.sqlCheckExpression, toDBType(schema.databaseType)) for (i in 1..numberOfRows) { - val constraint: SMTNode = parseCheckExpression(table, condition, i) + val constraint: SMTNode = parseCheckExpression(smtTable, condition, i) smt.addNode(constraint) } } catch (e: SqlConditionParserException) { @@ -125,13 +124,13 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I /** * Parses a check expression and returns the corresponding SMT node. * - * @param table The table containing the check expression. + * @param smtTable The table containing the check expression. * @param condition The SQL condition to be parsed. * @param index The index of the row. * @return The corresponding SMT node. */ - private fun parseCheckExpression(table: TableDto, condition: SqlCondition, index: Int): SMTNode { - val visitor = SMTConditionVisitor(table.id.name.lowercase(), emptyMap(), schema.tables, index) + private fun parseCheckExpression(smtTable: SmtTable, condition: SqlCondition, index: Int): SMTNode { + val visitor = SMTConditionVisitor(smtTable.smtName, emptyMap(), schema.tables, index) return condition.accept(visitor, null) as SMTNode } @@ -159,29 +158,28 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * @param smt The SMT-LIB object to which key constraints are added. */ private fun appendKeyConstraints(smt: SMTLib) { - for (table in schema.tables) { - appendPrimaryKeyConstraints(smt, table) - appendForeignKeyConstraints(smt, table) + for (smtTable in smtTables) { + appendPrimaryKeyConstraints(smt, smtTable) + appendForeignKeyConstraints(smt, smtTable) } } private fun appendBooleanConstraints(smt: SMTLib) { - for (table in schema.tables) { - val tableName = table.id.name.lowercase() - for (column in table.columns) { + for (smtTable in smtTables) { + for (column in smtTable.dto.columns) { if (column.type.equals("BOOLEAN", ignoreCase = true)) { - val columnName = column.name.uppercase() + val columnName = smtTable.smtColumnName(column.name).uppercase() for (i in 1..numberOfRows) { smt.addNode( AssertSMTNode( OrAssertion( listOf( - EqualsAssertion(listOf("($columnName $tableName$i)", "\"true\"")), - EqualsAssertion(listOf("($columnName $tableName$i)", "\"True\"")), - EqualsAssertion(listOf("($columnName $tableName$i)", "\"TRUE\"")), - EqualsAssertion(listOf("($columnName $tableName$i)", "\"false\"")), - EqualsAssertion(listOf("($columnName $tableName$i)", "\"False\"")), - EqualsAssertion(listOf("($columnName $tableName$i)", "\"FALSE\"")) + EqualsAssertion(listOf("($columnName ${smtTable.smtName}$i)", "\"true\"")), + EqualsAssertion(listOf("($columnName ${smtTable.smtName}$i)", "\"True\"")), + EqualsAssertion(listOf("($columnName ${smtTable.smtName}$i)", "\"TRUE\"")), + EqualsAssertion(listOf("($columnName ${smtTable.smtName}$i)", "\"false\"")), + EqualsAssertion(listOf("($columnName ${smtTable.smtName}$i)", "\"False\"")), + EqualsAssertion(listOf("($columnName ${smtTable.smtName}$i)", "\"FALSE\"")) ) ) ) @@ -193,11 +191,10 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I } private fun appendTimestampConstraints(smt: SMTLib) { - for (table in schema.tables) { - val tableName = table.id.name.lowercase() - for (column in table.columns) { + for (smtTable in smtTables) { + for (column in smtTable.dto.columns) { if (column.type.equals("TIMESTAMP", ignoreCase = true)) { - val columnName = column.name.uppercase() + val columnName = smtTable.smtColumnName(column.name).uppercase() val lowerBound = 0 // Example for Unix epoch start val upperBound = 32503680000 // Example for year 3000 in seconds @@ -205,7 +202,7 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I smt.addNode( AssertSMTNode( GreaterThanOrEqualsAssertion( - "($columnName $tableName$i)", + "($columnName ${smtTable.smtName}$i)", lowerBound.toString() ) ) @@ -213,7 +210,7 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I smt.addNode( AssertSMTNode( LessThanOrEqualsAssertion( - "($columnName $tableName$i)", + "($columnName ${smtTable.smtName}$i)", upperBound.toString() ) ) @@ -229,14 +226,13 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * Appends primary key constraints for each table to the SMT-LIB. * * @param smt The SMT-LIB object to which primary key constraints are added. - * @param table The table for which primary key constraints are added. + * @param smtTable The table for which primary key constraints are added. */ - private fun appendPrimaryKeyConstraints(smt: SMTLib, table: TableDto) { - val tableName = table.id.name.lowercase() - val primaryKeys = table.columns.filter { it.primaryKey } + private fun appendPrimaryKeyConstraints(smt: SMTLib, smtTable: SmtTable) { + val primaryKeys = smtTable.dto.columns.filter { it.primaryKey } for (primaryKey in primaryKeys) { - val nodes = assertForDistinctField(primaryKey.name, tableName) + val nodes = assertForDistinctField(smtTable.smtColumnName(primaryKey.name), smtTable.smtName) smt.addNodes(nodes) } } @@ -271,20 +267,19 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * Appends foreign key constraints for each table to the SMT-LIB. * * @param smt The SMT-LIB object to which foreign key constraints are added. - * @param table The table for which foreign key constraints are added. + * @param smtTable The table for which foreign key constraints are added. */ - private fun appendForeignKeyConstraints(smt: SMTLib, table: TableDto) { - val sourceTableName = table.id.name.lowercase() - - for (foreignKey in table.foreignKeys) { - val referencedTable = findReferencedTable(foreignKey) - val referencedTableName = referencedTable.id.name.lowercase() - val referencedColumnSelector = findReferencedPKSelector(table, referencedTable, foreignKey) + private fun appendForeignKeyConstraints(smt: SMTLib, smtTable: SmtTable) { + for (foreignKey in smtTable.dto.foreignKeys) { + val referencedSmtTable = findReferencedSmtTable(foreignKey) + val referencedColumnSelector = referencedSmtTable.smtColumnName( + findReferencedPKSelector(smtTable.dto, referencedSmtTable.dto, foreignKey) + ) for (sourceColumn in foreignKey.sourceColumns) { val nodes = assertForEqualsAny( - sourceColumn, sourceTableName, - referencedColumnSelector, referencedTableName + smtTable.smtColumnName(sourceColumn), smtTable.smtName, + referencedColumnSelector, referencedSmtTable.smtName ) smt.addNodes(nodes) } @@ -352,13 +347,13 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I } /** - * Finds the referenced table based on the foreign key. + * Finds the [SmtTable] for the table referenced by the given foreign key. * * @param foreignKey The foreign key constraint. - * @return The referenced table. + * @return The referenced [SmtTable]. */ - private fun findReferencedTable(foreignKey: ForeignKeyDto): TableDto { - return schema.tables.firstOrNull { it.id.name.equals(foreignKey.targetTable, ignoreCase = true) } + private fun findReferencedSmtTable(foreignKey: ForeignKeyDto): SmtTable { + return smtTableByOriginalName[foreignKey.targetTable.lowercase()] ?: throw RuntimeException("Referenced table not found: ${foreignKey.targetTable}") } @@ -434,7 +429,9 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I * @return The corresponding SMT node. */ private fun parseQueryCondition(tableAliases: Map, defaultTableName: String, condition: SqlCondition, index: Int): SMTNode { - val visitor = SMTConditionVisitor(defaultTableName, tableAliases, schema.tables, index) + val smtDefaultTableName = smtTableByOriginalName[defaultTableName.lowercase()]?.smtName + ?: convertToAscii(defaultTableName) + val visitor = SMTConditionVisitor(smtDefaultTableName, tableAliases, schema.tables, index) return condition.accept(visitor, null) as SMTNode } @@ -517,11 +514,10 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I } // Only add GetValueSMTNode for the mentioned tables - for (table in schema.tables) { - val tableNameLower = table.id.name.lowercase() - if (tablesMentioned.contains(tableNameLower)) { + for (smtTable in smtTables) { + if (tablesMentioned.contains(smtTable.originalName)) { for (i in 1..numberOfRows) { - smt.addNode(GetValueSMTNode("$tableNameLower$i")) + smt.addNode(GetValueSMTNode("${smtTable.smtName}$i")) } } } @@ -530,29 +526,34 @@ class SmtLibGenerator(private val schema: DbInfoDto, private val numberOfRows: I /** * Gets the constructors for a table's columns to be used in SMT-LIB. * - * @param table The table for which constructors are generated. + * @param smtTable The table for which constructors are generated. * @return A list of SMT nodes for column constructors. */ - private fun getConstructors(table: TableDto): List { - return table.columns.map { c -> + private fun getConstructors(smtTable: SmtTable): List { + return smtTable.dto.columns.map { c -> val smtType = TYPE_MAP[c.type.uppercase()] ?: throw RuntimeException("Unsupported column type: ${c.type}") - DeclareConstSMTNode(c.name, smtType) + DeclareConstSMTNode(smtTable.smtColumnName(c.name), smtType) } } companion object { + // Maps database column types to SMT-LIB types private val TYPE_MAP = mapOf( "BIGINT" to "Int", "BIT" to "Int", "INTEGER" to "Int", + "INT" to "Int", "INT2" to "Int", "INT4" to "Int", "INT8" to "Int", "TINYINT" to "Int", "SMALLINT" to "Int", "NUMERIC" to "Int", + "SERIAL" to "Int", + "SMALLSERIAL" to "Int", + "BIGSERIAL" to "Int", "TIMESTAMP" to "Int", "DATE" to "Int", "FLOAT" to "Real", diff --git a/core/src/main/kotlin/org/evomaster/core/solver/SmtTable.kt b/core/src/main/kotlin/org/evomaster/core/solver/SmtTable.kt new file mode 100644 index 0000000000..06b5e5bdc8 --- /dev/null +++ b/core/src/main/kotlin/org/evomaster/core/solver/SmtTable.kt @@ -0,0 +1,30 @@ +package org.evomaster.core.solver + +import org.evomaster.client.java.controller.api.dto.database.schema.TableDto +import org.evomaster.core.utils.StringUtils +import org.evomaster.core.utils.StringUtils.convertToAscii + +/** + * A view of a [TableDto] with pre-computed SMT-safe identifiers. + * + * All table and column names are converted to ASCII once at construction time, + * avoiding repeated [convertToAscii] calls throughout [SmtLibGenerator]. + */ +class SmtTable(val dto: TableDto) { + + /** Original lowercase table name, used to match against SQL query table references. */ + val originalName: String = dto.id.name.lowercase() + + /** SMT-safe lowercase identifier used in row constant declarations (e.g., "person1", "person2"). */ + val smtName: String = convertToAscii(dto.id.name).lowercase() + + /** SMT-LIB datatype name for this table's rows (e.g., "PersonRow"). */ + val dataTypeName: String = "${StringUtils.capitalization(smtName)}Row" + + private val columnSmtNames: Map = + dto.columns.associate { col -> col.name to convertToAscii(col.name) } + + /** Returns the SMT-safe identifier for the given column name. */ + fun smtColumnName(columnName: String): String = + columnSmtNames[columnName] ?: convertToAscii(columnName) +} diff --git a/core/src/main/kotlin/org/evomaster/core/utils/StringUtils.kt b/core/src/main/kotlin/org/evomaster/core/utils/StringUtils.kt index 426d1a8737..7557b4ccfa 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/StringUtils.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/StringUtils.kt @@ -1,7 +1,5 @@ package org.evomaster.core.utils -import java.util.* - object StringUtils { /** @@ -82,4 +80,79 @@ object StringUtils { } return lines } + + /** + * Converts a string to a valid ASCII identifier for use in SMT-LIB. + * SMT-LIB unquoted symbols are restricted to ASCII. + * + * The conversion uses two complementary steps: + * 1. An explicit folding map for characters that have no canonical decomposition under NFD + * (e.g., Ø→O, Æ→AE, ß→ss, ð→d, þ→th, Ł→L, Œ→OE, ŋ→n, ħ→h, ı→i, …), + * covering non-decomposable characters from the Unicode Latin Extended blocks. + * 2. NFD normalization followed by stripping of non-ASCII combining marks, which handles + * all accented characters that do decompose (e.g., é→e, ü→u, ñ→n, Ä→A, ö→o, å→a). + * + * Any remaining non-ASCII characters (e.g., from non-Latin scripts) are dropped. + */ + fun convertToAscii(name: String): String { + val sb = StringBuilder(name.length * 2) + for (ch in name) { + sb.append(ASCII_FOLD_MAP[ch] ?: ch.toString()) + } + return java.text.Normalizer.normalize(sb.toString(), java.text.Normalizer.Form.NFD) + .replace(Regex("[^\\x00-\\x7F]"), "") + } + + /** + * Explicit ASCII replacements for Unicode characters that do not decompose under NFD normalization. + * Covers non-decomposable characters from the Unicode Latin-1 Supplement and Latin Extended-A/B blocks. + * Characters that DO decompose under NFD (e.g., Ä, ö, å, é, ü, ñ) are handled by the NFD step in + * [convertToAscii] and need no entry here. + */ + private val ASCII_FOLD_MAP: Map = mapOf( + // Latin-1 Supplement + 'Æ' to "AE", 'æ' to "ae", // AE ligature (Danish, Norwegian, Old English) + 'Ð' to "D", 'ð' to "d", // Eth (Icelandic, Old English) + 'Ø' to "O", 'ø' to "o", // O with stroke (Danish, Norwegian) + 'Þ' to "TH", 'þ' to "th", // Thorn (Icelandic, Old English) + 'ß' to "ss", // Sharp S (German) + // Latin Extended-A + 'Ħ' to "H", 'ħ' to "h", // H with stroke (Maltese) + 'ı' to "i", // Dotless i (Turkish, Azerbaijani) + 'IJ' to "IJ", 'ij' to "ij", // IJ digraph (Dutch) + 'ĸ' to "k", // Kra (Greenlandic) + 'Ł' to "L", 'ł' to "l", // L with stroke (Polish, Croatian, Sorbian) + 'Ŋ' to "N", 'ŋ' to "n", // Eng (Sami, African languages) + 'Œ' to "OE", 'œ' to "oe", // OE ligature (French) + 'Ŧ' to "T", 'ŧ' to "t", // T with stroke (Sami) + // Latin Extended-B + 'ƀ' to "b", 'Ƀ' to "B", // B with stroke + 'Ɓ' to "B", // B with hook + 'Ƈ' to "C", 'ƈ' to "c", // C with hook + 'Ɗ' to "D", // D with hook + 'ƌ' to "d", // D with topbar + 'Ƒ' to "F", 'ƒ' to "f", // F with hook + 'Ɠ' to "G", // G with hook + 'Ɨ' to "I", // I with stroke + 'Ƙ' to "K", 'ƙ' to "k", // K with hook + 'ƚ' to "l", // L with bar + 'Ɲ' to "N", 'ƞ' to "n", // N with hook / N with long right leg + 'Œ' to "OE", 'œ' to "oe", + 'Ƥ' to "P", 'ƥ' to "p", // P with hook + 'ƫ' to "t", // T with palatal hook + 'Ƭ' to "T", 'ƭ' to "t", // T with hook + 'Ʈ' to "T", // T with retroflex hook + 'Ư' to "U", 'ư' to "u", // U with horn (Vietnamese) + 'Ʋ' to "V", // V with hook + 'Ƴ' to "Y", 'ƴ' to "y", // Y with hook + 'Ƶ' to "Z", 'ƶ' to "z", // Z with stroke + 'Ǝ' to "E", 'ǝ' to "e", // Reversed E / Schwa + 'Ɵ' to "O", // O with middle tilde + 'Ȼ' to "C", 'ȼ' to "c", // C with stroke + 'Ɇ' to "E", 'ɇ' to "e", // E with stroke + 'Ɉ' to "J", 'ɉ' to "j", // J with stroke + 'Ɋ' to "Q", 'ɋ' to "q", // Q with hook tail + 'Ɍ' to "R", 'ɍ' to "r", // R with stroke + 'Ɏ' to "Y", 'ɏ' to "y", // Y with stroke + ) } diff --git a/core/src/test/kotlin/org/evomaster/core/utils/StringUtilsTest.kt b/core/src/test/kotlin/org/evomaster/core/utils/StringUtilsTest.kt index 6575a6f981..ebe7bbb7d5 100644 --- a/core/src/test/kotlin/org/evomaster/core/utils/StringUtilsTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/utils/StringUtilsTest.kt @@ -19,4 +19,66 @@ class StringUtilsTest{ assertEquals(", Hello",lines[2]) assertEquals(", C, D",lines[3]) } + + @Test + fun testConvertToAsciiPlainAsciiUnchanged() { + assertEquals("hello_world", StringUtils.convertToAscii("hello_world")) + assertEquals("FooBar123", StringUtils.convertToAscii("FooBar123")) + } + + @Test + fun testConvertToAsciiNorwegianDanish() { + // Ø/ø and Æ/æ do not decompose under NFD — handled by explicit map + assertEquals("O", StringUtils.convertToAscii("Ø")) + assertEquals("o", StringUtils.convertToAscii("ø")) + assertEquals("AE", StringUtils.convertToAscii("Æ")) + assertEquals("ae", StringUtils.convertToAscii("æ")) + // Å/å decomposes under NFD + assertEquals("A", StringUtils.convertToAscii("Å")) + assertEquals("a", StringUtils.convertToAscii("å")) + } + + @Test + fun testConvertToAsciiSwedishGerman() { + // These all decompose under NFD (base letter + combining diacritic) + assertEquals("A", StringUtils.convertToAscii("Ä")) + assertEquals("a", StringUtils.convertToAscii("ä")) + assertEquals("O", StringUtils.convertToAscii("Ö")) + assertEquals("o", StringUtils.convertToAscii("ö")) + assertEquals("U", StringUtils.convertToAscii("Ü")) + assertEquals("u", StringUtils.convertToAscii("ü")) + // ß does not decompose under NFD — handled by explicit map + assertEquals("ss", StringUtils.convertToAscii("ß")) + } + + @Test + fun testConvertToAsciiIcelandic() { + assertEquals("D", StringUtils.convertToAscii("Ð")) + assertEquals("d", StringUtils.convertToAscii("ð")) + assertEquals("TH", StringUtils.convertToAscii("Þ")) + assertEquals("th", StringUtils.convertToAscii("þ")) + } + + @Test + fun testConvertToAsciiPolishFrench() { + assertEquals("L", StringUtils.convertToAscii("Ł")) + assertEquals("l", StringUtils.convertToAscii("ł")) + assertEquals("OE", StringUtils.convertToAscii("Œ")) + assertEquals("oe", StringUtils.convertToAscii("œ")) + } + + @Test + fun testConvertToAsciiOtherAccented() { + // Common accented characters that decompose under NFD + assertEquals("e", StringUtils.convertToAscii("é")) + assertEquals("e", StringUtils.convertToAscii("è")) + assertEquals("n", StringUtils.convertToAscii("ñ")) + assertEquals("c", StringUtils.convertToAscii("ç")) + } + + @Test + fun testConvertToAsciiMixedString() { + assertEquals("StromsAElv", StringUtils.convertToAscii("StrømsÆlv")) + assertEquals("Malostranke_namesti", StringUtils.convertToAscii("Malostranké_náměstí")) + } } \ No newline at end of file