From 8c03d8beb63803d4409cd112459589407ca38479 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 22 Oct 2025 17:12:27 -0300 Subject: [PATCH 01/26] added Java support for missing predefined character classes escapes (\v, \V, \h, \H) --- .../org/evomaster/core/parser/RegexJava.g4 | 2 +- .../gene/regex/CharacterClassEscapeRxGene.kt | 6 ++- .../core/search/service/Randomness.kt | 44 ++++++++++++++++--- .../core/parser/GeneRegexJavaVisitorTest.kt | 5 +++ 4 files changed, 48 insertions(+), 9 deletions(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index 1f0ca5683f..37bf68641c 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -234,7 +234,7 @@ AtomEscape fragment CharacterClassEscape //one of d D s S w W - : [dDsSwW] + : [dDsSwWvVhH] ; diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index f58cab48bc..a61a487407 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -31,7 +31,7 @@ class CharacterClassEscapeRxGene( var value: String = "" init { - if (!listOf("w", "W", "d", "D", "s", "S").contains(type)) { + if (!listOf("w", "W", "d", "D", "s", "S", "v", "V", "h", "H").contains(type)) { throw IllegalArgumentException("Invalid type: $type") } } @@ -61,6 +61,10 @@ class CharacterClassEscapeRxGene( "W" -> randomness.nextNonWordChar() "s" -> randomness.nextSpaceChar() "S" -> randomness.nextNonSpaceChar() + "v" -> randomness.nextVerticalSpaceChar() + "V" -> randomness.nextNonVerticalSpaceChar() + "h" -> randomness.nextHorizontalSpaceChar() + "H" -> randomness.nextNonHorizontalSpaceChar() else -> //this should never happen due to check in init throw IllegalStateException("Type '\\$type' not supported yet") diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 34e744a9f3..7abe5e26e2 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -36,17 +36,24 @@ class Randomness { private val digitSet = "0123456789" private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" - private val norwegianLetterSet = "æøåÆØÅ" private val wordSet = "_$digitSet$asciiLetterSet" - private val spaceSet = " \t\r\n" - private val punctuationSet = "!@#$%^&*()[]{}<>:;|" + private val spaceSet = " \t\r\n\u000C\u000b" + private val verticalSpaceSet = "\n\u000B\u000C\r\u0085\u2028\u2029" + private val horizontalSpaceSet = " \t\u00A0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000" - private val allSet = "$wordSet$spaceSet$norwegianLetterSet$punctuationSet" + // does not actually include all characters but covers ASCII + private val allSet = (0x00..0xFF).map { it.toChar() }.joinToString("") - private val nonWordSet = allSet.replace(wordSet,"") - private val nonDigitSet = allSet.replace(digitSet, "") - private val nonSpaceSet = allSet.replace(spaceSet, "") + // used to complement sets as they may have repeated characters + private fun complementSet(base: String, remove: String): String = + base.filterNot { it in remove } + + private val nonWordSet = complementSet(allSet, wordSet) + private val nonDigitSet = complementSet(allSet, digitSet) + private val nonSpaceSet = complementSet(allSet, spaceSet) + private val nonVerticalSpaceSet = complementSet(allSet, verticalSpaceSet) + private val nonHorizontalSpaceSet = complementSet(allSet, horizontalSpaceSet) private val wordChars = wordSet.map { it.toInt() }.sorted() @@ -312,6 +319,29 @@ class Randomness { return k } + fun nextVerticalSpaceChar(): Char { + val k = nextFromStringSet(verticalSpaceSet) + log.trace("nextVerticalSpaceChar(): {}", k) + return k + } + + fun nextNonVerticalSpaceChar(): Char { + val k = nextFromStringSet(nonVerticalSpaceSet) + log.trace("nextNonVerticalSpaceChar(): {}", k) + return k + } + + fun nextHorizontalSpaceChar(): Char { + val k = nextFromStringSet(horizontalSpaceSet) + log.trace("nextHorizontalSpaceChar(): {}", k) + return k + } + + fun nextNonHorizontalSpaceChar(): Char { + val k = nextFromStringSet(nonHorizontalSpaceSet) + log.trace("nextNonHorizontalSpaceChar(): {}", k) + return k + } fun wordCharPool() = wordChars diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index a54a3cae5d..5a3f1452b5 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -92,4 +92,9 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { override fun testControlLetterEscape() { checkSameAsJava("""cac!\ca\cg\cz\cA\cG\cZ\c@\c[\c\\c]\c^\c\c_\c?""") } + + @Test + fun testJavaCharClassEscape(){ + checkSameAsJava("""\v\V\h\H""") + } } \ No newline at end of file From 147d40c00d3911756cb76aaac3f3ffe3bb3ad4bf Mon Sep 17 00:00:00 2001 From: lmasroca Date: Thu, 23 Oct 2025 19:15:43 -0300 Subject: [PATCH 02/26] added Java support for POSIX character classes (\p{Lower}, \p{Print}, etc.) --- .../org/evomaster/core/parser/RegexJava.g4 | 17 +++++++++++ .../core/parser/GeneRegexJavaVisitor.kt | 2 +- .../gene/regex/CharacterClassEscapeRxGene.kt | 26 +++++++++-------- .../core/search/service/Randomness.kt | 29 ++++++++++++++++++- .../core/parser/GeneRegexJavaVisitorTest.kt | 6 ++++ 5 files changed, 66 insertions(+), 14 deletions(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index 37bf68641c..e4225b4b7a 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -125,9 +125,26 @@ fragment CharacterEscape | HexEscapeSequence | UnicodeEscapeSequence | OctalEscapeSequence + | 'p' BRACE_open PosixCharacterClassLabel BRACE_close //| IdentityEscape ; +fragment PosixCharacterClassLabel + : 'Lower' + | 'Upper' + | 'ASCII' + | 'Alpha' + | 'Digit' + | 'Alnum' + | 'Punct' + | 'Graph' + | 'Print' + | 'Blank' + | 'Cntrl' + | 'XDigit' + | 'Space' + ; + fragment ControlEscape //one of f n r t v : [aefnrt] diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 73682332f5..e3fbcb3f36 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -227,7 +227,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ ) ) } - else -> return VisitResult(CharacterClassEscapeRxGene(txt[1].toString())) + else -> return VisitResult(CharacterClassEscapeRxGene(txt.substring(1))) } } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index a61a487407..bb6f417a45 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -19,6 +19,7 @@ import org.slf4j.LoggerFactory \D Find a non-digit character \s Find a whitespace character \S Find a non-whitespace character +\p{X} Find a character from X POSIX character class (eg:\p{Lower}) */ class CharacterClassEscapeRxGene( val type: String @@ -31,7 +32,7 @@ class CharacterClassEscapeRxGene( var value: String = "" init { - if (!listOf("w", "W", "d", "D", "s", "S", "v", "V", "h", "H").contains(type)) { + if (!listOf("w", "W", "d", "D", "s", "S", "v", "V", "h", "H").contains(type) && 'p' != type[0]) { throw IllegalArgumentException("Invalid type: $type") } } @@ -54,17 +55,18 @@ class CharacterClassEscapeRxGene( val previous = value - value = when(type){ - "d" -> randomness.nextDigitChar() - "D" -> randomness.nextNonDigitChar() - "w" -> randomness.nextWordChar() - "W" -> randomness.nextNonWordChar() - "s" -> randomness.nextSpaceChar() - "S" -> randomness.nextNonSpaceChar() - "v" -> randomness.nextVerticalSpaceChar() - "V" -> randomness.nextNonVerticalSpaceChar() - "h" -> randomness.nextHorizontalSpaceChar() - "H" -> randomness.nextNonHorizontalSpaceChar() + value = when(type[0]){ + 'd' -> randomness.nextDigitChar() + 'D' -> randomness.nextNonDigitChar() + 'w' -> randomness.nextWordChar() + 'W' -> randomness.nextNonWordChar() + 's' -> randomness.nextSpaceChar() + 'S' -> randomness.nextNonSpaceChar() + 'v' -> randomness.nextVerticalSpaceChar() + 'V' -> randomness.nextNonVerticalSpaceChar() + 'h' -> randomness.nextHorizontalSpaceChar() + 'H' -> randomness.nextNonHorizontalSpaceChar() + 'p' -> randomness.nextPosixCharClassChar(type) else -> //this should never happen due to check in init throw IllegalStateException("Type '\\$type' not supported yet") diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 7abe5e26e2..e087d2cef4 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -40,7 +40,9 @@ class Randomness { private val wordSet = "_$digitSet$asciiLetterSet" private val spaceSet = " \t\r\n\u000C\u000b" private val verticalSpaceSet = "\n\u000B\u000C\r\u0085\u2028\u2029" - private val horizontalSpaceSet = " \t\u00A0\u1680\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000" + private val horizontalSpaceSet = " \t\u00A0\u1680\u180e\u2000\u2001\u2002\u2003" + + "\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000" + private val punctuationSet = "!\"#\$%&'()*+,-./:;<=>?@[\\]^_`{|}~" // does not actually include all characters but covers ASCII private val allSet = (0x00..0xFF).map { it.toChar() }.joinToString("") @@ -55,6 +57,22 @@ class Randomness { private val nonVerticalSpaceSet = complementSet(allSet, verticalSpaceSet) private val nonHorizontalSpaceSet = complementSet(allSet, horizontalSpaceSet) + private val posixCharClassSet = mapOf( + "Lower" to ('a'..'z').joinToString(""), + "Upper" to ('A'..'Z').joinToString(""), + "ASCII" to (0x00..0x7F).map { it.toChar() }.joinToString(""), + "Alpha" to asciiLetterSet, + "Digit" to digitSet, + "Alnum" to "$digitSet$asciiLetterSet", + "Punct" to punctuationSet, + "Graph" to "$digitSet$asciiLetterSet$punctuationSet", + "Print" to "$digitSet$asciiLetterSet$punctuationSet\u0020", + "Blank" to " \t", + "Cntrl" to (0x00..0x1F).map { it.toChar() }.joinToString("") + 0x7F.toChar(), + "XDigit" to "0123456789abcdefABCDEF", + "Space" to spaceSet + ) + private val wordChars = wordSet.map { it.toInt() }.sorted() /** @@ -343,6 +361,15 @@ class Randomness { return k } + fun nextPosixCharClassChar(type: String): Char { + if (type.substring(2,type.length-1) !in posixCharClassSet){ + throw IllegalArgumentException("$type invalid/unsupported POSIX character class") + } + val k = nextFromStringSet(posixCharClassSet[type.substring(2,type.length-1)]!!) + log.trace("nextPosixCharClassChar({}): {}", type, k) + return k + } + fun wordCharPool() = wordChars fun validNextWordChars(min: Int, max: Int): List { diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index 5a3f1452b5..58307b6651 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -97,4 +97,10 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { fun testJavaCharClassEscape(){ checkSameAsJava("""\v\V\h\H""") } + + @Test + fun testPosixCharacterClasses(){ + checkSameAsJava("""\p{Lower}\p{Upper}\p{ASCII}\p{Alpha}\p{Digit}\p{Alnum}\p{Punct}\p{Graph} + |\p{Print}\p{Blank}\p{Cntrl}\p{XDigit}\p{Space}""".trimMargin()) + } } \ No newline at end of file From fa88731a59ca1d5e5e473eabde03b142bfd09d0d Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 28 Oct 2025 15:24:02 -0300 Subject: [PATCH 03/26] added a comment --- .../main/kotlin/org/evomaster/core/search/service/Randomness.kt | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index e087d2cef4..b176706b2f 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -44,6 +44,7 @@ class Randomness { "\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000" private val punctuationSet = "!\"#\$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + // TODO this is neither efficient nor complete; this will be modified // does not actually include all characters but covers ASCII private val allSet = (0x00..0xFF).map { it.toChar() }.joinToString("") From 2c9793c97baf44f1bf80416e0c1cec3d1997454f Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 4 Nov 2025 20:43:39 -0300 Subject: [PATCH 04/26] Regex support for negated character classes, improved and optimized complement for predefined character classes --- .../search/gene/regex/CharacterRangeRxGene.kt | 123 +++++++++++++----- .../core/search/service/Randomness.kt | 121 +++++++++-------- .../parser/GeneRegexEcma262VisitorTest.kt | 10 ++ 3 files changed, 163 insertions(+), 91 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index a029a474b5..b5cbd6d5b5 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -11,29 +11,35 @@ import org.evomaster.core.search.service.mutator.MutationWeightControl import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMutationInfo import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy import org.slf4j.LoggerFactory -import kotlin.math.max -import kotlin.math.min class CharacterRangeRxGene( - val negated: Boolean, - ranges: List> + val negated: Boolean, + val ranges: List> ) : RxAtom, SimpleGene("."){ companion object{ private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - init { - //TODO this will need to be supported - if(negated){ - throw IllegalArgumentException("Negated ranges are not supported yet") - } + private var internalRanges = mutableListOf>() + init { if(ranges.isEmpty()){ throw IllegalArgumentException("No defined ranges") } + if(negated) internalRanges.add(Pair(Character.MIN_VALUE,Character.MAX_VALUE)) + for (range in ranges) { + val max = maxOf(range.first, range.second) + val min = minOf(range.first, range.second) + if(negated){ + remove(Pair(min, max)) + } else { + add(Pair(min, max)) + } + } + ranges.forEach { if(it.first.code > it.second.code){ LoggingUtil.uniqueWarn(log, "Issue with Regex range, where '${it.first}' is greater than '${it.second}'") @@ -41,20 +47,57 @@ class CharacterRangeRxGene( } } - var value : Char = ranges[0].first + var value : Char = internalRanges[0].first + + private fun add(toAdd: Pair) { + val newInternalRanges = mutableListOf>() + var currentStart = toAdd.first + var currentEnd = toAdd.second + var merged = false + + for ((start, end) in internalRanges.sortedBy { it.first }){ + when { + end < currentStart - 1 -> newInternalRanges += start to end + start > currentEnd + 1 -> { + if (!merged) { + newInternalRanges += currentStart to currentEnd + merged = true + } + newInternalRanges += start to end + } + else -> { + currentStart = minOf(currentStart, start) + currentEnd = maxOf(currentEnd, end) + } + } + } + + if (!merged) { + newInternalRanges += currentStart to currentEnd + } + + internalRanges = newInternalRanges + } - /** - * As inputs might be unsorted, we make sure first <= second - */ - val ranges = ranges.map { Pair(min(it.first.code,it.second.code).toChar(), max(it.first.code, it.second.code).toChar()) } + private fun remove(toRemove: Pair) { + internalRanges = internalRanges.flatMap { r -> + when { + toRemove.second < r.first || toRemove.first > r.second -> + listOf(r) + else -> buildList { + if (toRemove.first > r.first) add(Pair(r.first, toRemove.first - 1)) + if (toRemove.second < r.second) add(Pair(toRemove.second + 1, r.second)) + } + } + }.toMutableList() + } override fun checkForLocallyValidIgnoringChildren() : Boolean{ - //TODO negated - return ranges.any { value.code >= it.first.code && value.code <= it.second.code } + return internalRanges.any { value.code >= it.first.code && value.code <= it.second.code } } override fun isMutable(): Boolean { - return ranges.size > 1 || ranges[0].let { it.first != it.second } + return internalRanges.size > 1 || internalRanges[0].let { it.first != it.second } } override fun copyContent(): Gene { @@ -64,28 +107,36 @@ class CharacterRangeRxGene( } override fun setValueWithRawString(value: String) { - // need to check val c = value.toCharArray().firstOrNull() - if (c!= null) + if (c!= null){ + val prev = this.value this.value = c + if (!isLocallyValid()) this.value = prev + } } override fun randomize(randomness: Randomness, tryToForceNewValue: Boolean) { - - /* - TODO current is very simple, biased implementation. - Should rather have uniform sampling among all valid chars - */ - val range = randomness.choose(ranges) - - value = randomness.nextChar(range.first, range.second) + val total = internalRanges.sumOf { it.second.code - it.first.code + 1 } + val sampledValue = randomness.nextInt(total) + var currentRangeMinValue = 0 + for (r in internalRanges) { + val currentRangeMaxValue = currentRangeMinValue + r.second.code - r.first.code + 1 + if (sampledValue < currentRangeMaxValue) { + val codePoint = r.first.code + (sampledValue - currentRangeMinValue) + // is it necessary to log this? + log.trace("using Int {} as character selector for character class, resulting in character number: {}, {}", sampledValue, codePoint, codePoint.toChar()) + value = codePoint.toChar() + return + } + currentRangeMinValue = currentRangeMaxValue + } + throw IllegalArgumentException("No defined ranges") } override fun shallowMutate(randomness: Randomness, apc: AdaptiveParameterControl, mwc: MutationWeightControl, selectionStrategy: SubsetGeneMutationSelectionStrategy, enableAdaptiveGeneMutation: Boolean, additionalGeneMutationInfo: AdditionalGeneMutationInfo?): Boolean { - var t = 0 - for(i in 0 until ranges.size){ - val p = ranges[i] + for(i in 0 until internalRanges.size){ + val p = internalRanges[i] if(value >= p.first && value <= p.second){ t = i break @@ -94,18 +145,18 @@ class CharacterRangeRxGene( val delta = randomness.choose(listOf(1,-1)) - if(value + delta > ranges[t].second){ + if(value + delta > internalRanges[t].second){ /* going over current max range. check next range and take its minimum */ - val next = (t+1) % ranges.size - value = ranges[next].first + val next = (t+1) % internalRanges.size + value = internalRanges[next].first - } else if(value + delta < ranges[t].first){ + } else if(value + delta < internalRanges[t].first){ - val previous = (t - 1 + ranges.size) % ranges.size - value = ranges[previous].second + val previous = (t - 1 + internalRanges.size) % internalRanges.size + value = internalRanges[previous].second } else { value += delta diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index b176706b2f..69b472d1ac 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -2,6 +2,7 @@ package org.evomaster.core.search.service import com.google.inject.Inject import org.evomaster.core.EMConfig +import org.evomaster.core.search.gene.regex.CharacterRangeRxGene import org.evomaster.core.utils.NumberCalculationUtil import org.evomaster.core.utils.NumberCalculationUtil.calculateIncrement import org.slf4j.Logger @@ -14,6 +15,10 @@ class Randomness { companion object { private val log: Logger = LoggerFactory.getLogger(Randomness::class.java) + + private fun stringToListOfCharPairs(s: String) : List> { + return s.map { it to it } + } } @Inject @@ -34,45 +39,45 @@ class Randomness { updateSeed(configuration.seed) } - private val digitSet = "0123456789" - private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" - - private val wordSet = "_$digitSet$asciiLetterSet" - private val spaceSet = " \t\r\n\u000C\u000b" - private val verticalSpaceSet = "\n\u000B\u000C\r\u0085\u2028\u2029" - private val horizontalSpaceSet = " \t\u00A0\u1680\u180e\u2000\u2001\u2002\u2003" + - "\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000" - private val punctuationSet = "!\"#\$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - - // TODO this is neither efficient nor complete; this will be modified - // does not actually include all characters but covers ASCII - private val allSet = (0x00..0xFF).map { it.toChar() }.joinToString("") - - // used to complement sets as they may have repeated characters - private fun complementSet(base: String, remove: String): String = - base.filterNot { it in remove } - - private val nonWordSet = complementSet(allSet, wordSet) - private val nonDigitSet = complementSet(allSet, digitSet) - private val nonSpaceSet = complementSet(allSet, spaceSet) - private val nonVerticalSpaceSet = complementSet(allSet, verticalSpaceSet) - private val nonHorizontalSpaceSet = complementSet(allSet, horizontalSpaceSet) - - private val posixCharClassSet = mapOf( - "Lower" to ('a'..'z').joinToString(""), - "Upper" to ('A'..'Z').joinToString(""), - "ASCII" to (0x00..0x7F).map { it.toChar() }.joinToString(""), - "Alpha" to asciiLetterSet, - "Digit" to digitSet, - "Alnum" to "$digitSet$asciiLetterSet", - "Punct" to punctuationSet, - "Graph" to "$digitSet$asciiLetterSet$punctuationSet", - "Print" to "$digitSet$asciiLetterSet$punctuationSet\u0020", - "Blank" to " \t", - "Cntrl" to (0x00..0x1F).map { it.toChar() }.joinToString("") + 0x7F.toChar(), - "XDigit" to "0123456789abcdefABCDEF", - "Space" to spaceSet - ) + private val digitS = listOf('0' to '9') + private val asciiLetterS = listOf('a' to 'z', 'A' to 'Z') + private val wordS = listOf('_' to '_') + asciiLetterS + digitS + private val spaceS = stringToListOfCharPairs(" \t\r\n\u000C\u000b") + private val horizontalSpaceS = listOf(0x2000.toChar() to 0x200a.toChar()) + + stringToListOfCharPairs(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") + private val verticalSpaceS = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") + private val punctuationS = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") + + private val digitCC = CharacterRangeRxGene(false, digitS) + private val asciiLetterCC = CharacterRangeRxGene(false, asciiLetterS) + private val wordCC = CharacterRangeRxGene(false, wordS) + private val spaceCC = CharacterRangeRxGene(false, spaceS) + private val horizontalSpaceCC = CharacterRangeRxGene(false, horizontalSpaceS) + private val verticalSpaceCC = CharacterRangeRxGene(false, verticalSpaceS) + + private val nonDigitCC = CharacterRangeRxGene(true, digitS) + private val nonWordCC = CharacterRangeRxGene(true, wordS) + private val nonSpaceCC = CharacterRangeRxGene(true, spaceS) + private val nonHorizontalSpaceCC = CharacterRangeRxGene(true, horizontalSpaceS) + private val nonVerticalSpaceCC = CharacterRangeRxGene(true, verticalSpaceS) + + private val posixCharClassCC = mapOf( + "Lower" to listOf('a' to 'z'), + "Upper" to listOf('A' to 'Z'), + "ASCII" to listOf(0.toChar() to 0x7f.toChar()), + "Alpha" to asciiLetterS, + "Digit" to digitS, + "Alnum" to digitS + asciiLetterS, + "Punct" to punctuationS, + "Graph" to digitS + asciiLetterS + punctuationS, + "Print" to digitS + asciiLetterS + punctuationS + stringToListOfCharPairs("\u0020"), + "Blank" to stringToListOfCharPairs(" \t"), + "Cntrl" to listOf(0.toChar() to 0x1f.toChar()) + stringToListOfCharPairs("\u007f"), + "XDigit" to listOf('0' to '9', 'a' to 'f', 'A' to 'F'), + "Space" to spaceS + ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } + + private val wordSet = "_0123456789abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" private val wordChars = wordSet.map { it.toInt() }.sorted() @@ -291,9 +296,7 @@ class Randomness { fun nextLetter(): Char { - val characters = asciiLetterSet - - val k = characters[random.nextInt(characters.length)] + val k = nextFromCharClass(asciiLetterCC) log.trace("nextLetter(): {}", k) return k } @@ -302,71 +305,79 @@ class Randomness { return set[random.nextInt(set.length)] } + fun nextFromCharClass(cc: CharacterRangeRxGene) : Char{ + cc.randomize(this, false) + val k = cc.value + // is it necessary to log this? + log.trace("nextFromCharClass(): {}", k) + return k + } + fun nextWordChar(): Char { - val k = nextFromStringSet(wordSet) + val k = nextFromCharClass(wordCC) log.trace("nextWordChar(): {}", k) return k } - fun nextNonWordChar() : Char{ - val k = nextFromStringSet(nonWordSet) + fun nextNonWordChar() : Char { + val k = nextFromCharClass(nonWordCC) log.trace("nextNonWordChar(): {}", k) return k } fun nextDigitChar(): Char { - val k = nextFromStringSet(digitSet) + val k = nextFromCharClass(digitCC) log.trace("nextDigitChar(): {}", k) return k } fun nextNonDigitChar(): Char { - val k = nextFromStringSet(nonDigitSet) + val k = nextFromCharClass(nonDigitCC) log.trace("nextNonDigitChar(): {}", k) return k } fun nextSpaceChar(): Char { - val k = nextFromStringSet(spaceSet) + val k = nextFromCharClass(spaceCC) log.trace("nextSpaceChar(): {}", k) return k } fun nextNonSpaceChar(): Char { - val k = nextFromStringSet(nonSpaceSet) + val k = nextFromCharClass(nonSpaceCC) log.trace("nextNonSpaceChar(): {}", k) return k } fun nextVerticalSpaceChar(): Char { - val k = nextFromStringSet(verticalSpaceSet) + val k = nextFromCharClass(verticalSpaceCC) log.trace("nextVerticalSpaceChar(): {}", k) return k } fun nextNonVerticalSpaceChar(): Char { - val k = nextFromStringSet(nonVerticalSpaceSet) + val k = nextFromCharClass(nonVerticalSpaceCC) log.trace("nextNonVerticalSpaceChar(): {}", k) return k } fun nextHorizontalSpaceChar(): Char { - val k = nextFromStringSet(horizontalSpaceSet) + val k = nextFromCharClass(horizontalSpaceCC) log.trace("nextHorizontalSpaceChar(): {}", k) return k } fun nextNonHorizontalSpaceChar(): Char { - val k = nextFromStringSet(nonHorizontalSpaceSet) + val k = nextFromCharClass(nonHorizontalSpaceCC) log.trace("nextNonHorizontalSpaceChar(): {}", k) return k } fun nextPosixCharClassChar(type: String): Char { - if (type.substring(2,type.length-1) !in posixCharClassSet){ + if (type.substring(2,type.length-1) !in posixCharClassCC){ throw IllegalArgumentException("$type invalid/unsupported POSIX character class") } - val k = nextFromStringSet(posixCharClassSet[type.substring(2,type.length-1)]!!) + val k = nextFromCharClass(posixCharClassCC[type.substring(2,type.length-1)]!!) log.trace("nextPosixCharClassChar({}): {}", type, k) return k } diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexEcma262VisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexEcma262VisitorTest.kt index 12a1a832dd..6c4a839530 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexEcma262VisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexEcma262VisitorTest.kt @@ -348,4 +348,14 @@ open class GeneRegexEcma262VisitorTest : RegexTestTemplate(){ // The following escape sequences behave differently in Java and JavaScript. checkCanSample("""\ca\cg\cz""","\u0001\u0007\u001A",10_000) } + + @Test + fun testNegatedCharClasses(){ + checkSameAsJava("""[^a-zA-Z0-9_,]""") + } + + @Test + fun testComplementCompleteness(){ + checkCanSample("""\D""", listOf("\u0000", "\uffff"), 1_000_000) + } } \ No newline at end of file From 3a863c7dc24448a2c937d6eabf0f08daff8089f0 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Fri, 7 Nov 2025 00:44:12 -0300 Subject: [PATCH 05/26] added some comments --- .../core/search/gene/regex/CharacterRangeRxGene.kt | 5 ++++- .../org/evomaster/core/search/service/Randomness.kt | 12 +++++++----- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index b5cbd6d5b5..2e2765627f 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -29,6 +29,9 @@ class CharacterRangeRxGene( throw IllegalArgumentException("No defined ranges") } + // this limits the character class complements to 0xffff instead of allowing up to 0x10ffff, but values over + // 0xffff are not permitted on Char as they need 2 Chars to be represented; to allow this, we would need to + // use String or Int in every possible step as methods which return a single Char cannot return these characters if(negated) internalRanges.add(Pair(Character.MIN_VALUE,Character.MAX_VALUE)) for (range in ranges) { val max = maxOf(range.first, range.second) @@ -124,7 +127,7 @@ class CharacterRangeRxGene( if (sampledValue < currentRangeMaxValue) { val codePoint = r.first.code + (sampledValue - currentRangeMinValue) // is it necessary to log this? - log.trace("using Int {} as character selector for character class, resulting in character number: {}, {}", sampledValue, codePoint, codePoint.toChar()) + log.trace("using Int {} as character selector for character class, resulting in code point: {}, which is: {}", sampledValue, codePoint, codePoint.toChar()) value = codePoint.toChar() return } diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 69b472d1ac..10dfb9f33c 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -49,8 +49,6 @@ class Randomness { private val punctuationS = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") private val digitCC = CharacterRangeRxGene(false, digitS) - private val asciiLetterCC = CharacterRangeRxGene(false, asciiLetterS) - private val wordCC = CharacterRangeRxGene(false, wordS) private val spaceCC = CharacterRangeRxGene(false, spaceS) private val horizontalSpaceCC = CharacterRangeRxGene(false, horizontalSpaceS) private val verticalSpaceCC = CharacterRangeRxGene(false, verticalSpaceS) @@ -61,6 +59,7 @@ class Randomness { private val nonHorizontalSpaceCC = CharacterRangeRxGene(true, horizontalSpaceS) private val nonVerticalSpaceCC = CharacterRangeRxGene(true, verticalSpaceS) + // US-ASCII POSIX character classes (\p{X}) private val posixCharClassCC = mapOf( "Lower" to listOf('a' to 'z'), "Upper" to listOf('A' to 'Z'), @@ -77,7 +76,8 @@ class Randomness { "Space" to spaceS ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } - private val wordSet = "_0123456789abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" + private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" + private val wordSet = "_0123456789$asciiLetterSet" private val wordChars = wordSet.map { it.toInt() }.sorted() @@ -296,7 +296,9 @@ class Randomness { fun nextLetter(): Char { - val k = nextFromCharClass(asciiLetterCC) + val characters = asciiLetterSet + + val k = characters[random.nextInt(characters.length)] log.trace("nextLetter(): {}", k) return k } @@ -314,7 +316,7 @@ class Randomness { } fun nextWordChar(): Char { - val k = nextFromCharClass(wordCC) + val k = nextFromStringSet(wordSet) log.trace("nextWordChar(): {}", k) return k } From 8c3190770177f68f44f089dbb0e8de6c450e3cc1 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 11 Nov 2025 13:39:38 -0300 Subject: [PATCH 06/26] trying a fix --- .../core/search/service/Randomness.kt | 40 ++++++------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 10dfb9f33c..c4052ce6e7 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -48,17 +48,6 @@ class Randomness { private val verticalSpaceS = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") private val punctuationS = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") - private val digitCC = CharacterRangeRxGene(false, digitS) - private val spaceCC = CharacterRangeRxGene(false, spaceS) - private val horizontalSpaceCC = CharacterRangeRxGene(false, horizontalSpaceS) - private val verticalSpaceCC = CharacterRangeRxGene(false, verticalSpaceS) - - private val nonDigitCC = CharacterRangeRxGene(true, digitS) - private val nonWordCC = CharacterRangeRxGene(true, wordS) - private val nonSpaceCC = CharacterRangeRxGene(true, spaceS) - private val nonHorizontalSpaceCC = CharacterRangeRxGene(true, horizontalSpaceS) - private val nonVerticalSpaceCC = CharacterRangeRxGene(true, verticalSpaceS) - // US-ASCII POSIX character classes (\p{X}) private val posixCharClassCC = mapOf( "Lower" to listOf('a' to 'z'), @@ -74,7 +63,7 @@ class Randomness { "Cntrl" to listOf(0.toChar() to 0x1f.toChar()) + stringToListOfCharPairs("\u007f"), "XDigit" to listOf('0' to '9', 'a' to 'f', 'A' to 'F'), "Space" to spaceS - ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } + ) private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" private val wordSet = "_0123456789$asciiLetterSet" @@ -295,10 +284,7 @@ class Randomness { } fun nextLetter(): Char { - - val characters = asciiLetterSet - - val k = characters[random.nextInt(characters.length)] + val k = nextFromCharClass(CharacterRangeRxGene(false, asciiLetterS)) log.trace("nextLetter(): {}", k) return k } @@ -316,61 +302,61 @@ class Randomness { } fun nextWordChar(): Char { - val k = nextFromStringSet(wordSet) + val k = nextFromCharClass(CharacterRangeRxGene(false, wordS)) log.trace("nextWordChar(): {}", k) return k } fun nextNonWordChar() : Char { - val k = nextFromCharClass(nonWordCC) + val k = nextFromCharClass(CharacterRangeRxGene(true, wordS)) log.trace("nextNonWordChar(): {}", k) return k } fun nextDigitChar(): Char { - val k = nextFromCharClass(digitCC) + val k = nextFromCharClass(CharacterRangeRxGene(false, digitS)) log.trace("nextDigitChar(): {}", k) return k } fun nextNonDigitChar(): Char { - val k = nextFromCharClass(nonDigitCC) + val k = nextFromCharClass(CharacterRangeRxGene(true, digitS)) log.trace("nextNonDigitChar(): {}", k) return k } fun nextSpaceChar(): Char { - val k = nextFromCharClass(spaceCC) + val k = nextFromCharClass(CharacterRangeRxGene(false, spaceS)) log.trace("nextSpaceChar(): {}", k) return k } fun nextNonSpaceChar(): Char { - val k = nextFromCharClass(nonSpaceCC) + val k = nextFromCharClass(CharacterRangeRxGene(true, spaceS)) log.trace("nextNonSpaceChar(): {}", k) return k } fun nextVerticalSpaceChar(): Char { - val k = nextFromCharClass(verticalSpaceCC) + val k = nextFromCharClass(CharacterRangeRxGene(false, verticalSpaceS)) log.trace("nextVerticalSpaceChar(): {}", k) return k } fun nextNonVerticalSpaceChar(): Char { - val k = nextFromCharClass(nonVerticalSpaceCC) + val k = nextFromCharClass(CharacterRangeRxGene(true, verticalSpaceS)) log.trace("nextNonVerticalSpaceChar(): {}", k) return k } fun nextHorizontalSpaceChar(): Char { - val k = nextFromCharClass(horizontalSpaceCC) + val k = nextFromCharClass(CharacterRangeRxGene(false, horizontalSpaceS)) log.trace("nextHorizontalSpaceChar(): {}", k) return k } fun nextNonHorizontalSpaceChar(): Char { - val k = nextFromCharClass(nonHorizontalSpaceCC) + val k = nextFromCharClass(CharacterRangeRxGene(true, horizontalSpaceS)) log.trace("nextNonHorizontalSpaceChar(): {}", k) return k } @@ -379,7 +365,7 @@ class Randomness { if (type.substring(2,type.length-1) !in posixCharClassCC){ throw IllegalArgumentException("$type invalid/unsupported POSIX character class") } - val k = nextFromCharClass(posixCharClassCC[type.substring(2,type.length-1)]!!) + val k = nextFromCharClass(CharacterRangeRxGene(false, posixCharClassCC[type.substring(2,type.length-1)]!!)) log.trace("nextPosixCharClassChar({}): {}", type, k) return k } From 897f04bc0669834bb455a5ea81325d0ffbc44509 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 25 Nov 2025 15:04:40 -0300 Subject: [PATCH 07/26] increasing iterations for an e2e test to account for changes in randomization --- .../language/LanguageServerExampleEMTest.java | 2 +- .../core/search/service/Randomness.kt | 39 ++++++++++++------- 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java b/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java index cb00192bf6..1067d6934b 100644 --- a/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java +++ b/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java @@ -24,7 +24,7 @@ public void runEMTest() throws Throwable { runTestHandlingFlakyAndCompilation( "LanguageServerExampleGeneratedEMTest", "org.foo.LanguageServerExampleGeneratedEMTest", - 4_000, + 5_000, true, (args) -> { diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index c4052ce6e7..5013c1291b 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -48,6 +48,19 @@ class Randomness { private val verticalSpaceS = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") private val punctuationS = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") + private val digitCC = CharacterRangeRxGene(false, digitS) + private val asciiLetterCC = CharacterRangeRxGene(false, asciiLetterS) + private val wordCC = CharacterRangeRxGene(false, wordS) + private val spaceCC = CharacterRangeRxGene(false, spaceS) + private val horizontalSpaceCC = CharacterRangeRxGene(false, horizontalSpaceS) + private val verticalSpaceCC = CharacterRangeRxGene(false, verticalSpaceS) + + private val nonDigitCC = CharacterRangeRxGene(true, digitS) + private val nonWordCC = CharacterRangeRxGene(true, wordS) + private val nonSpaceCC = CharacterRangeRxGene(true, spaceS) + private val nonHorizontalSpaceCC = CharacterRangeRxGene(true, horizontalSpaceS) + private val nonVerticalSpaceCC = CharacterRangeRxGene(true, verticalSpaceS) + // US-ASCII POSIX character classes (\p{X}) private val posixCharClassCC = mapOf( "Lower" to listOf('a' to 'z'), @@ -63,7 +76,7 @@ class Randomness { "Cntrl" to listOf(0.toChar() to 0x1f.toChar()) + stringToListOfCharPairs("\u007f"), "XDigit" to listOf('0' to '9', 'a' to 'f', 'A' to 'F'), "Space" to spaceS - ) + ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" private val wordSet = "_0123456789$asciiLetterSet" @@ -284,7 +297,7 @@ class Randomness { } fun nextLetter(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(false, asciiLetterS)) + val k = nextFromCharClass(asciiLetterCC) log.trace("nextLetter(): {}", k) return k } @@ -302,61 +315,61 @@ class Randomness { } fun nextWordChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(false, wordS)) + val k = nextFromCharClass(wordCC) log.trace("nextWordChar(): {}", k) return k } fun nextNonWordChar() : Char { - val k = nextFromCharClass(CharacterRangeRxGene(true, wordS)) + val k = nextFromCharClass(nonWordCC) log.trace("nextNonWordChar(): {}", k) return k } fun nextDigitChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(false, digitS)) + val k = nextFromCharClass(digitCC) log.trace("nextDigitChar(): {}", k) return k } fun nextNonDigitChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(true, digitS)) + val k = nextFromCharClass(nonDigitCC) log.trace("nextNonDigitChar(): {}", k) return k } fun nextSpaceChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(false, spaceS)) + val k = nextFromCharClass(spaceCC) log.trace("nextSpaceChar(): {}", k) return k } fun nextNonSpaceChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(true, spaceS)) + val k = nextFromCharClass(nonSpaceCC) log.trace("nextNonSpaceChar(): {}", k) return k } fun nextVerticalSpaceChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(false, verticalSpaceS)) + val k = nextFromCharClass(verticalSpaceCC) log.trace("nextVerticalSpaceChar(): {}", k) return k } fun nextNonVerticalSpaceChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(true, verticalSpaceS)) + val k = nextFromCharClass(nonVerticalSpaceCC) log.trace("nextNonVerticalSpaceChar(): {}", k) return k } fun nextHorizontalSpaceChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(false, horizontalSpaceS)) + val k = nextFromCharClass(horizontalSpaceCC) log.trace("nextHorizontalSpaceChar(): {}", k) return k } fun nextNonHorizontalSpaceChar(): Char { - val k = nextFromCharClass(CharacterRangeRxGene(true, horizontalSpaceS)) + val k = nextFromCharClass(nonHorizontalSpaceCC) log.trace("nextNonHorizontalSpaceChar(): {}", k) return k } @@ -365,7 +378,7 @@ class Randomness { if (type.substring(2,type.length-1) !in posixCharClassCC){ throw IllegalArgumentException("$type invalid/unsupported POSIX character class") } - val k = nextFromCharClass(CharacterRangeRxGene(false, posixCharClassCC[type.substring(2,type.length-1)]!!)) + val k = nextFromCharClass(posixCharClassCC[type.substring(2,type.length-1)]!!) log.trace("nextPosixCharClassChar({}): {}", type, k) return k } From 56a7462fd6237a15a912f654db7dd2e9313ab3fb Mon Sep 17 00:00:00 2001 From: lmasroca Date: Fri, 5 Dec 2025 11:00:11 -0300 Subject: [PATCH 08/26] Disabled dto for TaintNestedEMTest from openapi-v2 E2E tests as CI currently fails otherwise --- .../spring/examples/taintnested/TaintNestedEMTest.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v2/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java b/core-tests/e2e-tests/spring/spring-rest-openapi-v2/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java index 38cee273c0..ac0abc0f26 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v2/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v2/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java @@ -43,6 +43,9 @@ After the fix in Gene.doInitialize to use requiresRandomInitialization(), setOption(args, "extraQueryParam", "false"); setOption(args, "extraHeader", "false"); + // TODO disabled dto for this test as CI currently fails, remove next line when fixed + setOption(args, "dtoForRequestPayload","false"); + Solution solution = initAndRun(args); assertTrue(solution.getIndividuals().size() >= 1); From a1d1e32d2df88e64d99e5074f8df491adecdd051 Mon Sep 17 00:00:00 2001 From: Philip Garrett Date: Sat, 14 Mar 2026 12:44:33 -0300 Subject: [PATCH 09/26] Add required field to spec --- .../src/main/resources/static/openapi-dto-reflective-assert.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/main/resources/static/openapi-dto-reflective-assert.yaml b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/main/resources/static/openapi-dto-reflective-assert.yaml index e975247445..07ca6d3dbe 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/main/resources/static/openapi-dto-reflective-assert.yaml +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/main/resources/static/openapi-dto-reflective-assert.yaml @@ -97,6 +97,7 @@ paths: - aDouble - aFloat - aBoolean + - aNullableString responses: '200': description: OK From e9780d9ca19a3b7e827285ff5cb5dd86b3411993 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 16 Mar 2026 22:08:45 -0300 Subject: [PATCH 10/26] Revert "Disabled dto for TaintNestedEMTest from openapi-v2 E2E tests as CI currently fails otherwise" This reverts commit 56a7462fd6237a15a912f654db7dd2e9313ab3fb. --- .../spring/examples/taintnested/TaintNestedEMTest.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/core-tests/jdk-8/spring-rest-openapi-v2-tests/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java b/core-tests/jdk-8/spring-rest-openapi-v2-tests/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java index ac0abc0f26..38cee273c0 100644 --- a/core-tests/jdk-8/spring-rest-openapi-v2-tests/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java +++ b/core-tests/jdk-8/spring-rest-openapi-v2-tests/src/test/java/org/evomaster/e2etests/spring/examples/taintnested/TaintNestedEMTest.java @@ -43,9 +43,6 @@ After the fix in Gene.doInitialize to use requiresRandomInitialization(), setOption(args, "extraQueryParam", "false"); setOption(args, "extraHeader", "false"); - // TODO disabled dto for this test as CI currently fails, remove next line when fixed - setOption(args, "dtoForRequestPayload","false"); - Solution solution = initAndRun(args); assertTrue(solution.getIndividuals().size() >= 1); From fededcf1461b21051a4705d23d1cf2cc8e88b9a0 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 18 Mar 2026 14:04:35 -0300 Subject: [PATCH 11/26] Requested changes pt.1 --- .../org/evomaster/core/parser/RegexJava.g4 | 2 +- .../search/gene/regex/CharacterRangeRxGene.kt | 75 ++++++++++------ .../core/search/service/Randomness.kt | 89 +++++++++---------- 3 files changed, 91 insertions(+), 75 deletions(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index e4225b4b7a..e885746dec 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -250,7 +250,7 @@ AtomEscape ; fragment CharacterClassEscape - //one of d D s S w W + //one of d D s S w W v V h H : [dDsSwWvVhH] ; diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 7b091da2a2..fbcc597553 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -12,6 +12,11 @@ import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMuta import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy import org.slf4j.LoggerFactory +data class CharacterRange(val start: Char, val end: Char){ + val size: Int + get() = end.code - start.code + 1 + operator fun contains(char: Char): Boolean = char in start..end +} class CharacterRangeRxGene( val negated: Boolean, @@ -22,7 +27,7 @@ class CharacterRangeRxGene( private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - private var internalRanges = mutableListOf>() + private var internalRanges = mutableListOf() init { if(ranges.isEmpty()){ @@ -32,14 +37,14 @@ class CharacterRangeRxGene( // this limits the character class complements to 0xffff instead of allowing up to 0x10ffff, but values over // 0xffff are not permitted on Char as they need 2 Chars to be represented; to allow this, we would need to // use String or Int in every possible step as methods which return a single Char cannot return these characters - if(negated) internalRanges.add(Pair(Character.MIN_VALUE,Character.MAX_VALUE)) + if(negated) internalRanges.add(CharacterRange(Character.MIN_VALUE,Character.MAX_VALUE)) for (range in ranges) { val max = maxOf(range.first, range.second) val min = minOf(range.first, range.second) if(negated){ - remove(Pair(min, max)) + remove(CharacterRange(min, max)) } else { - add(Pair(min, max)) + add(CharacterRange(min, max)) } } @@ -50,23 +55,30 @@ class CharacterRangeRxGene( } } - var value : Char = internalRanges[0].first - - private fun add(toAdd: Pair) { - val newInternalRanges = mutableListOf>() - var currentStart = toAdd.first - var currentEnd = toAdd.second + var value : Char = internalRanges[0].start + + /** + * Adds a character range to a [org.evomaster.core.search.gene.regex.CharacterRangeRxGene]. + * + * The range is added to the character class in a way that does not generate repeated elements. + * + * @param toAdd The character range to be added to the character class. + */ + private fun add(toAdd: CharacterRange) { + val newInternalRanges = mutableListOf() + var currentStart = toAdd.start + var currentEnd = toAdd.end var merged = false - for ((start, end) in internalRanges.sortedBy { it.first }){ + for ((start, end) in internalRanges.sortedBy { it.start }){ when { - end < currentStart - 1 -> newInternalRanges += start to end + end < currentStart - 1 -> newInternalRanges += CharacterRange(start, end) start > currentEnd + 1 -> { if (!merged) { - newInternalRanges += currentStart to currentEnd + newInternalRanges += CharacterRange(currentStart, currentEnd) merged = true } - newInternalRanges += start to end + newInternalRanges += CharacterRange(start, end) } else -> { currentStart = minOf(currentStart, start) @@ -76,31 +88,36 @@ class CharacterRangeRxGene( } if (!merged) { - newInternalRanges += currentStart to currentEnd + newInternalRanges += CharacterRange(currentStart, currentEnd) } internalRanges = newInternalRanges } - private fun remove(toRemove: Pair) { + /** + * Safely removes a character range from a [org.evomaster.core.search.gene.regex.CharacterRangeRxGene]. + * + * @param toRemove The character range to be removed from the character class. + */ + private fun remove(toRemove: CharacterRange) { internalRanges = internalRanges.flatMap { r -> when { - toRemove.second < r.first || toRemove.first > r.second -> + toRemove.end < r.start || toRemove.start > r.end -> listOf(r) else -> buildList { - if (toRemove.first > r.first) add(Pair(r.first, toRemove.first - 1)) - if (toRemove.second < r.second) add(Pair(toRemove.second + 1, r.second)) + if (toRemove.start > r.start) add(CharacterRange(r.start, toRemove.start - 1)) + if (toRemove.end < r.end) add(CharacterRange(toRemove.end + 1, r.end)) } } }.toMutableList() } override fun checkForLocallyValidIgnoringChildren() : Boolean{ - return internalRanges.any { value.code >= it.first.code && value.code <= it.second.code } + return internalRanges.any { value in it } } override fun isMutable(): Boolean { - return internalRanges.size > 1 || internalRanges[0].let { it.first != it.second } + return internalRanges.size > 1 || internalRanges[0].let { it.start != it.end } } override fun copyContent(): Gene { @@ -120,13 +137,13 @@ class CharacterRangeRxGene( } override fun randomize(randomness: Randomness, tryToForceNewValue: Boolean) { - val total = internalRanges.sumOf { it.second.code - it.first.code + 1 } + val total = internalRanges.sumOf { it.size } val sampledValue = randomness.nextInt(total) var currentRangeMinValue = 0 for (r in internalRanges) { - val currentRangeMaxValue = currentRangeMinValue + r.second.code - r.first.code + 1 + val currentRangeMaxValue = currentRangeMinValue + r.size if (sampledValue < currentRangeMaxValue) { - val codePoint = r.first.code + (sampledValue - currentRangeMinValue) + val codePoint = r.start.code + (sampledValue - currentRangeMinValue) // is it necessary to log this? log.trace("using Int {} as character selector for character class, resulting in code point: {}, which is: {}", sampledValue, codePoint, codePoint.toChar()) value = codePoint.toChar() @@ -141,7 +158,7 @@ class CharacterRangeRxGene( var t = 0 for(i in 0 until internalRanges.size){ val p = internalRanges[i] - if(value >= p.first && value <= p.second){ + if(value in p){ t = i break } @@ -149,18 +166,18 @@ class CharacterRangeRxGene( val delta = randomness.choose(listOf(1,-1)) - if(value + delta > internalRanges[t].second){ + if(value + delta > internalRanges[t].end){ /* going over current max range. check next range and take its minimum */ val next = (t+1) % internalRanges.size - value = internalRanges[next].first + value = internalRanges[next].start - } else if(value + delta < internalRanges[t].first){ + } else if(value + delta < internalRanges[t].start){ val previous = (t - 1 + internalRanges.size) % internalRanges.size - value = internalRanges[previous].second + value = internalRanges[previous].end } else { value += delta diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 84e40cc26f..812255cd1d 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -3,7 +3,6 @@ package org.evomaster.core.search.service import com.google.inject.Inject import org.evomaster.core.EMConfig import org.evomaster.core.search.gene.regex.CharacterRangeRxGene -import org.evomaster.core.utils.NumberCalculationUtil import org.evomaster.core.utils.NumberCalculationUtil.calculateIncrement import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -39,49 +38,49 @@ class Randomness { updateSeed(configuration.seed) } - private val digitS = listOf('0' to '9') - private val asciiLetterS = listOf('a' to 'z', 'A' to 'Z') - private val wordS = listOf('_' to '_') + asciiLetterS + digitS - private val spaceS = stringToListOfCharPairs(" \t\r\n\u000C\u000b") - private val horizontalSpaceS = listOf(0x2000.toChar() to 0x200a.toChar()) + + private val digitSet = listOf('0' to '9') + private val asciiLetterSet = listOf('a' to 'z', 'A' to 'Z') + private val wordSet = listOf('_' to '_') + asciiLetterSet + digitSet + private val spaceSet = stringToListOfCharPairs(" \t\r\n\u000C\u000b") + private val horizontalSpaceSet = listOf(0x2000.toChar() to 0x200a.toChar()) + stringToListOfCharPairs(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") - private val verticalSpaceS = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") - private val punctuationS = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") - - private val digitCC = CharacterRangeRxGene(false, digitS) - private val asciiLetterCC = CharacterRangeRxGene(false, asciiLetterS) - private val wordCC = CharacterRangeRxGene(false, wordS) - private val spaceCC = CharacterRangeRxGene(false, spaceS) - private val horizontalSpaceCC = CharacterRangeRxGene(false, horizontalSpaceS) - private val verticalSpaceCC = CharacterRangeRxGene(false, verticalSpaceS) - - private val nonDigitCC = CharacterRangeRxGene(true, digitS) - private val nonWordCC = CharacterRangeRxGene(true, wordS) - private val nonSpaceCC = CharacterRangeRxGene(true, spaceS) - private val nonHorizontalSpaceCC = CharacterRangeRxGene(true, horizontalSpaceS) - private val nonVerticalSpaceCC = CharacterRangeRxGene(true, verticalSpaceS) + private val verticalSpaceSet = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") + private val punctuationSet = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") + + private val digitCharClass = CharacterRangeRxGene(false, digitSet) + private val asciiLetterCharClass = CharacterRangeRxGene(false, asciiLetterSet) + private val wordCharClass = CharacterRangeRxGene(false, wordSet) + private val spaceCharClass = CharacterRangeRxGene(false, spaceSet) + private val horizontalSpaceCharClass = CharacterRangeRxGene(false, horizontalSpaceSet) + private val verticalSpaceCharClass = CharacterRangeRxGene(false, verticalSpaceSet) + + private val nonDigitCharClass = CharacterRangeRxGene(true, digitSet) + private val nonWordCharClass = CharacterRangeRxGene(true, wordSet) + private val nonSpaceCharClass = CharacterRangeRxGene(true, spaceSet) + private val nonHorizontalSpaceCharClass = CharacterRangeRxGene(true, horizontalSpaceSet) + private val nonVerticalSpaceCharClass = CharacterRangeRxGene(true, verticalSpaceSet) // US-ASCII POSIX character classes (\p{X}) - private val posixCharClassCC = mapOf( + private val posixCharClasses = mapOf( "Lower" to listOf('a' to 'z'), "Upper" to listOf('A' to 'Z'), "ASCII" to listOf(0.toChar() to 0x7f.toChar()), - "Alpha" to asciiLetterS, - "Digit" to digitS, - "Alnum" to digitS + asciiLetterS, - "Punct" to punctuationS, - "Graph" to digitS + asciiLetterS + punctuationS, - "Print" to digitS + asciiLetterS + punctuationS + stringToListOfCharPairs("\u0020"), + "Alpha" to asciiLetterSet, + "Digit" to digitSet, + "Alnum" to digitSet + asciiLetterSet, + "Punct" to punctuationSet, + "Graph" to digitSet + asciiLetterSet + punctuationSet, + "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharPairs("\u0020"), "Blank" to stringToListOfCharPairs(" \t"), "Cntrl" to listOf(0.toChar() to 0x1f.toChar()) + stringToListOfCharPairs("\u007f"), "XDigit" to listOf('0' to '9', 'a' to 'f', 'A' to 'F'), - "Space" to spaceS + "Space" to spaceSet ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } - private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" - private val wordSet = "_0123456789$asciiLetterSet" + private val simpleAsciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" + private val simpleWordSet = "_0123456789$simpleAsciiLetterSet" - private val wordChars = wordSet.map { it.toInt() }.sorted() + private val wordChars = simpleWordSet.map { it.toInt() }.sorted() /** * A negative value means the current CPU time clock is used instead @@ -297,7 +296,7 @@ class Randomness { } fun nextLetter(): Char { - val k = nextFromCharClass(asciiLetterCC) + val k = nextFromCharClass(asciiLetterCharClass) log.trace("nextLetter(): {}", k) return k } @@ -315,70 +314,70 @@ class Randomness { } fun nextWordChar(): Char { - val k = nextFromCharClass(wordCC) + val k = nextFromCharClass(wordCharClass) log.trace("nextWordChar(): {}", k) return k } fun nextNonWordChar() : Char { - val k = nextFromCharClass(nonWordCC) + val k = nextFromCharClass(nonWordCharClass) log.trace("nextNonWordChar(): {}", k) return k } fun nextDigitChar(): Char { - val k = nextFromCharClass(digitCC) + val k = nextFromCharClass(digitCharClass) log.trace("nextDigitChar(): {}", k) return k } fun nextNonDigitChar(): Char { - val k = nextFromCharClass(nonDigitCC) + val k = nextFromCharClass(nonDigitCharClass) log.trace("nextNonDigitChar(): {}", k) return k } fun nextSpaceChar(): Char { - val k = nextFromCharClass(spaceCC) + val k = nextFromCharClass(spaceCharClass) log.trace("nextSpaceChar(): {}", k) return k } fun nextNonSpaceChar(): Char { - val k = nextFromCharClass(nonSpaceCC) + val k = nextFromCharClass(nonSpaceCharClass) log.trace("nextNonSpaceChar(): {}", k) return k } fun nextVerticalSpaceChar(): Char { - val k = nextFromCharClass(verticalSpaceCC) + val k = nextFromCharClass(verticalSpaceCharClass) log.trace("nextVerticalSpaceChar(): {}", k) return k } fun nextNonVerticalSpaceChar(): Char { - val k = nextFromCharClass(nonVerticalSpaceCC) + val k = nextFromCharClass(nonVerticalSpaceCharClass) log.trace("nextNonVerticalSpaceChar(): {}", k) return k } fun nextHorizontalSpaceChar(): Char { - val k = nextFromCharClass(horizontalSpaceCC) + val k = nextFromCharClass(horizontalSpaceCharClass) log.trace("nextHorizontalSpaceChar(): {}", k) return k } fun nextNonHorizontalSpaceChar(): Char { - val k = nextFromCharClass(nonHorizontalSpaceCC) + val k = nextFromCharClass(nonHorizontalSpaceCharClass) log.trace("nextNonHorizontalSpaceChar(): {}", k) return k } fun nextPosixCharClassChar(type: String): Char { - if (type.substring(2,type.length-1) !in posixCharClassCC){ + if (type.substring(2,type.length-1) !in posixCharClasses){ throw IllegalArgumentException("$type invalid/unsupported POSIX character class") } - val k = nextFromCharClass(posixCharClassCC[type.substring(2,type.length-1)]!!) + val k = nextFromCharClass(posixCharClasses[type.substring(2,type.length-1)]!!) log.trace("nextPosixCharClassChar({}): {}", type, k) return k } From 9822f49a629e2670af4510b3b26b63ad9725a53b Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 18 Mar 2026 17:11:54 -0300 Subject: [PATCH 12/26] Requested changes pt.2 --- .../parser/GenePostgresSimilarToVisitor.kt | 24 +++++++++---------- .../core/parser/GeneRegexEcma262Visitor.kt | 24 +++++++++---------- .../core/parser/GeneRegexJavaVisitor.kt | 24 +++++++++---------- .../search/gene/regex/CharacterRangeRxGene.kt | 10 ++++---- .../core/search/service/Randomness.kt | 23 +++++++++--------- .../core/search/gene/GeneSamplerForTests.kt | 2 +- .../gene/RegexGeneStructureTest.kt | 4 ++-- 7 files changed, 56 insertions(+), 55 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt index 13c3150e4d..99e37d24d0 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt @@ -181,7 +181,7 @@ class GenePostgresSimilarToVisitor : PostgresSimilarToBaseVisitor() val negated = ctx.CARET() != null - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List val gene = CharacterRangeRxGene(negated, ranges) @@ -191,10 +191,10 @@ class GenePostgresSimilarToVisitor : PostgresSimilarToBaseVisitor() override fun visitClassRanges(ctx: PostgresSimilarToParser.ClassRangesContext): VisitResult { val res = VisitResult() - val list = mutableListOf>() + val list = mutableListOf() if(ctx.nonemptyClassRanges() != null){ - val ranges = ctx.nonemptyClassRanges().accept(this).data as List> + val ranges = ctx.nonemptyClassRanges().accept(this).data as List list.addAll(ranges) } @@ -205,7 +205,7 @@ class GenePostgresSimilarToVisitor : PostgresSimilarToBaseVisitor() override fun visitNonemptyClassRanges(ctx: PostgresSimilarToParser.NonemptyClassRangesContext): VisitResult { - val list = mutableListOf>() + val list = mutableListOf() val startText = ctx.classAtom()[0].text assert(startText.length == 1) // single chars @@ -218,15 +218,15 @@ class GenePostgresSimilarToVisitor : PostgresSimilarToBaseVisitor() start } - list.add(Pair(start, end)) + list.add(CharacterRange(start, end)) if(ctx.nonemptyClassRangesNoDash() != null){ - val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List> + val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List list.addAll(ranges) } if(ctx.classRanges() != null){ - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List list.addAll(ranges) } @@ -239,27 +239,27 @@ class GenePostgresSimilarToVisitor : PostgresSimilarToBaseVisitor() override fun visitNonemptyClassRangesNoDash(ctx: PostgresSimilarToParser.NonemptyClassRangesNoDashContext): VisitResult { - val list = mutableListOf>() + val list = mutableListOf() if(ctx.MINUS() != null){ val start = ctx.classAtomNoDash().text[0] val end = ctx.classAtom().text[0] - list.add(Pair(start, end)) + list.add(CharacterRange(start, end)) } else { val char = (ctx.classAtom() ?: ctx.classAtomNoDash()).text[0] - list.add(Pair(char, char)) + list.add(CharacterRange(char, char)) } if(ctx.nonemptyClassRangesNoDash() != null){ - val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List> + val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List list.addAll(ranges) } if(ctx.classRanges() != null){ - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List list.addAll(ranges) } diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt index 6000e5054a..ae53e44e41 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt @@ -238,7 +238,7 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor(){ val negated = ctx.CARET() != null - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List val gene = CharacterRangeRxGene(negated, ranges) @@ -248,10 +248,10 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor(){ override fun visitClassRanges(ctx: RegexEcma262Parser.ClassRangesContext): VisitResult { val res = VisitResult() - val list = mutableListOf>() + val list = mutableListOf() if(ctx.nonemptyClassRanges() != null){ - val ranges = ctx.nonemptyClassRanges().accept(this).data as List> + val ranges = ctx.nonemptyClassRanges().accept(this).data as List list.addAll(ranges) } @@ -262,7 +262,7 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor(){ override fun visitNonemptyClassRanges(ctx: RegexEcma262Parser.NonemptyClassRangesContext): VisitResult { - val list = mutableListOf>() + val list = mutableListOf() val startText = ctx.classAtom()[0].text assert(startText.length == 1) // single chars @@ -275,15 +275,15 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor(){ start } - list.add(Pair(start, end)) + list.add(CharacterRange(start, end)) if(ctx.nonemptyClassRangesNoDash() != null){ - val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List> + val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List list.addAll(ranges) } if(ctx.classRanges() != null){ - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List list.addAll(ranges) } @@ -296,27 +296,27 @@ class GeneRegexEcma262Visitor : RegexEcma262BaseVisitor(){ override fun visitNonemptyClassRangesNoDash(ctx: RegexEcma262Parser.NonemptyClassRangesNoDashContext): VisitResult { - val list = mutableListOf>() + val list = mutableListOf() if(ctx.MINUS() != null){ val start = ctx.classAtomNoDash().text[0] val end = ctx.classAtom().text[0] - list.add(Pair(start, end)) + list.add(CharacterRange(start, end)) } else { val char = (ctx.classAtom() ?: ctx.classAtomNoDash()).text[0] - list.add(Pair(char, char)) + list.add(CharacterRange(char, char)) } if(ctx.nonemptyClassRangesNoDash() != null){ - val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List> + val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List list.addAll(ranges) } if(ctx.classRanges() != null){ - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List list.addAll(ranges) } diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 6e33b07d72..5d811571f9 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -281,7 +281,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ val negated = ctx.CARET() != null - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List val gene = CharacterRangeRxGene(negated, ranges) @@ -291,10 +291,10 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ override fun visitClassRanges(ctx: RegexJavaParser.ClassRangesContext): VisitResult { val res = VisitResult() - val list = mutableListOf>() + val list = mutableListOf() if(ctx.nonemptyClassRanges() != null){ - val ranges = ctx.nonemptyClassRanges().accept(this).data as List> + val ranges = ctx.nonemptyClassRanges().accept(this).data as List list.addAll(ranges) } @@ -305,7 +305,7 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ override fun visitNonemptyClassRanges(ctx: RegexJavaParser.NonemptyClassRangesContext): VisitResult { - val list = mutableListOf>() + val list = mutableListOf() val startText = ctx.classAtom()[0].text assert(startText.length == 1 || startText.length==2) // single chars or \+ and \. escaped chars @@ -330,15 +330,15 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ end = start } - list.add(Pair(start, end)) + list.add(CharacterRange(start, end)) if(ctx.nonemptyClassRangesNoDash() != null){ - val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List> + val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List list.addAll(ranges) } if(ctx.classRanges() != null){ - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List list.addAll(ranges) } @@ -351,27 +351,27 @@ class GeneRegexJavaVisitor : RegexJavaBaseVisitor(){ override fun visitNonemptyClassRangesNoDash(ctx: RegexJavaParser.NonemptyClassRangesNoDashContext): VisitResult { - val list = mutableListOf>() + val list = mutableListOf() if(ctx.MINUS() != null){ val start = ctx.classAtomNoDash().text[0] val end = ctx.classAtom().text[0] - list.add(Pair(start, end)) + list.add(CharacterRange(start, end)) } else { val char = (ctx.classAtom() ?: ctx.classAtomNoDash()).text[0] - list.add(Pair(char, char)) + list.add(CharacterRange(char, char)) } if(ctx.nonemptyClassRangesNoDash() != null){ - val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List> + val ranges = ctx.nonemptyClassRangesNoDash().accept(this).data as List list.addAll(ranges) } if(ctx.classRanges() != null){ - val ranges = ctx.classRanges().accept(this).data as List> + val ranges = ctx.classRanges().accept(this).data as List list.addAll(ranges) } diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index fbcc597553..29fec75208 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -20,7 +20,7 @@ data class CharacterRange(val start: Char, val end: Char){ class CharacterRangeRxGene( val negated: Boolean, - val ranges: List> + val ranges: List ) : RxAtom, SimpleGene("."){ companion object{ @@ -39,8 +39,8 @@ class CharacterRangeRxGene( // use String or Int in every possible step as methods which return a single Char cannot return these characters if(negated) internalRanges.add(CharacterRange(Character.MIN_VALUE,Character.MAX_VALUE)) for (range in ranges) { - val max = maxOf(range.first, range.second) - val min = minOf(range.first, range.second) + val max = maxOf(range.start, range.end) + val min = minOf(range.start, range.end) if(negated){ remove(CharacterRange(min, max)) } else { @@ -49,8 +49,8 @@ class CharacterRangeRxGene( } ranges.forEach { - if(it.first.code > it.second.code){ - LoggingUtil.uniqueWarn(log, "Issue with Regex range, where '${it.first}' is greater than '${it.second}'") + if(it.start.code > it.end.code){ + LoggingUtil.uniqueWarn(log, "Issue with Regex range, where '${it.start}' is greater than '${it.end}'") } } } diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 812255cd1d..4e63fd6aff 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -3,6 +3,7 @@ package org.evomaster.core.search.service import com.google.inject.Inject import org.evomaster.core.EMConfig import org.evomaster.core.search.gene.regex.CharacterRangeRxGene +import org.evomaster.core.search.gene.regex.CharacterRange import org.evomaster.core.utils.NumberCalculationUtil.calculateIncrement import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -15,8 +16,8 @@ class Randomness { companion object { private val log: Logger = LoggerFactory.getLogger(Randomness::class.java) - private fun stringToListOfCharPairs(s: String) : List> { - return s.map { it to it } + private fun stringToListOfCharPairs(s: String) : List { + return s.map { CharacterRange(it, it) } } } @@ -38,11 +39,11 @@ class Randomness { updateSeed(configuration.seed) } - private val digitSet = listOf('0' to '9') - private val asciiLetterSet = listOf('a' to 'z', 'A' to 'Z') - private val wordSet = listOf('_' to '_') + asciiLetterSet + digitSet + private val digitSet = listOf(CharacterRange('0', '9')) + private val asciiLetterSet = listOf(CharacterRange('a', 'z'), CharacterRange('A', 'Z')) + private val wordSet = listOf(CharacterRange('_', '_')) + asciiLetterSet + digitSet private val spaceSet = stringToListOfCharPairs(" \t\r\n\u000C\u000b") - private val horizontalSpaceSet = listOf(0x2000.toChar() to 0x200a.toChar()) + + private val horizontalSpaceSet = listOf(CharacterRange(0x2000.toChar(), 0x200a.toChar())) + stringToListOfCharPairs(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") private val verticalSpaceSet = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") private val punctuationSet = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") @@ -62,9 +63,9 @@ class Randomness { // US-ASCII POSIX character classes (\p{X}) private val posixCharClasses = mapOf( - "Lower" to listOf('a' to 'z'), - "Upper" to listOf('A' to 'Z'), - "ASCII" to listOf(0.toChar() to 0x7f.toChar()), + "Lower" to listOf(CharacterRange('a', 'z')), + "Upper" to listOf(CharacterRange('A', 'Z')), + "ASCII" to listOf(CharacterRange(0.toChar(), 0x7f.toChar())), "Alpha" to asciiLetterSet, "Digit" to digitSet, "Alnum" to digitSet + asciiLetterSet, @@ -72,8 +73,8 @@ class Randomness { "Graph" to digitSet + asciiLetterSet + punctuationSet, "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharPairs("\u0020"), "Blank" to stringToListOfCharPairs(" \t"), - "Cntrl" to listOf(0.toChar() to 0x1f.toChar()) + stringToListOfCharPairs("\u007f"), - "XDigit" to listOf('0' to '9', 'a' to 'f', 'A' to 'F'), + "Cntrl" to listOf(CharacterRange(0.toChar(), 0x1f.toChar())) + stringToListOfCharPairs("\u007f"), + "XDigit" to listOf(CharacterRange('0', '9'), CharacterRange('a', 'f'), CharacterRange('A', 'F')), "Space" to spaceSet ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } diff --git a/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt b/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt index 8e9dc935e9..64ac5cec5d 100644 --- a/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt +++ b/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt @@ -461,7 +461,7 @@ object GeneSamplerForTests { fun sampleCharacterRangeRxGene(rand: Randomness): CharacterRangeRxGene { return CharacterRangeRxGene( negated = false, // TODO update once fixed - ranges = listOf(Pair('a', 'z')) + ranges = listOf(CharacterRange('a', 'z')) ) } diff --git a/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt b/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt index de82ab5e9b..81b0e869e8 100644 --- a/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt @@ -51,7 +51,7 @@ class CharacterClassEscapeRxGeneStructureTest : GeneStructuralElementBaseTest() } class CharacterRangeRxGeneStructureTest : GeneStructuralElementBaseTest() { - override fun getCopyFromTemplate(): Gene = CharacterRangeRxGene(false, listOf(Pair('0','9'))).apply { value='2'} + override fun getCopyFromTemplate(): Gene = CharacterRangeRxGene(false, listOf(CharacterRange('0','9'))).apply { value='2'} override fun assertCopyFrom(base: Gene) { assertTrue(base is CharacterRangeRxGene) @@ -59,7 +59,7 @@ class CharacterRangeRxGeneStructureTest : GeneStructuralElementBaseTest() { assertEquals('2', (base as CharacterRangeRxGene).value) } - override fun getStructuralElement(): CharacterRangeRxGene = CharacterRangeRxGene(false, listOf(Pair('0','z'))).apply { value= 'w' } + override fun getStructuralElement(): CharacterRangeRxGene = CharacterRangeRxGene(false, listOf(CharacterRange('0','z'))).apply { value= 'w' } override fun getExpectedChildrenSize(): Int = 0 } From 719c1481e7416f07ea666a598dc56276e7ed6443 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Thu, 19 Mar 2026 11:32:40 -0300 Subject: [PATCH 13/26] Solved TODO related to negated character classes in GeneSamplerForTests.kt --- .../org/evomaster/core/search/gene/GeneSamplerForTests.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt b/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt index 64ac5cec5d..84745a1e45 100644 --- a/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt +++ b/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt @@ -460,7 +460,7 @@ object GeneSamplerForTests { fun sampleCharacterRangeRxGene(rand: Randomness): CharacterRangeRxGene { return CharacterRangeRxGene( - negated = false, // TODO update once fixed + negated = rand.nextBoolean(), ranges = listOf(CharacterRange('a', 'z')) ) } From fc407bf5716c2b2395d5b5900e7747b30ec0c9a7 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Fri, 20 Mar 2026 15:48:53 -0300 Subject: [PATCH 14/26] Increasing iterations for e2e test --- .../rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java b/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java index 0d8f6ebe18..95dfa3be9c 100644 --- a/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java +++ b/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java @@ -27,7 +27,7 @@ public void testFindHashEM() throws Throwable { runTestHandlingFlakyAndCompilation( "RedisLettuceFindHashEM", "org.foo.spring.rest.redis.RedisLettuceFindHashEM", - 1000, + 2000, true, (args) -> { setOption(args, "heuristicsForRedis", "true"); From abefd645b1a55190b15fd12f5a1e7301a992ff74 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Thu, 26 Mar 2026 14:12:17 -0300 Subject: [PATCH 15/26] Requested changes --- .../org/evomaster/core/parser/RegexJava.g4 | 3 ++ .../search/gene/regex/CharacterRangeRxGene.kt | 43 ++++++++++++------- .../core/search/service/Randomness.kt | 17 ++++---- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index e885746dec..b71fb3123a 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -129,6 +129,8 @@ fragment CharacterEscape //| IdentityEscape ; +// basic US-ASCII only predefined POSIX character classes +// https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#:~:text=character%3A%20%5B%5E%5Cw%5D-,POSIX,-character%20classes%20(US fragment PosixCharacterClassLabel : 'Lower' | 'Upper' @@ -251,6 +253,7 @@ AtomEscape fragment CharacterClassEscape //one of d D s S w W v V h H + // v, V, h and H are java8 exclusive, they represent vertical spaces and horizaontal spaces respectively : [dDsSwWvVhH] ; diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 29fec75208..a6ba5421ed 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -12,10 +12,21 @@ import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMuta import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy import org.slf4j.LoggerFactory -data class CharacterRange(val start: Char, val end: Char){ +class CharacterRange(val start: Char, val end: Char){ + companion object { + operator fun invoke(a: Char, b: Char): CharacterRange = + if (a <= b) CharacterRange(a, b) else CharacterRange(b, a) + + operator fun invoke(a: Int, b: Int): CharacterRange = + invoke(a.toChar(), b.toChar()) + } + val size: Int get() = end.code - start.code + 1 operator fun contains(char: Char): Boolean = char in start..end + + operator fun component1(): Char = start + operator fun component2(): Char = end } class CharacterRangeRxGene( @@ -27,6 +38,9 @@ class CharacterRangeRxGene( private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } + /** + * this represents the valid ranges for a character class, removing overlaps and applying negation + */ private var internalRanges = mutableListOf() init { @@ -37,21 +51,20 @@ class CharacterRangeRxGene( // this limits the character class complements to 0xffff instead of allowing up to 0x10ffff, but values over // 0xffff are not permitted on Char as they need 2 Chars to be represented; to allow this, we would need to // use String or Int in every possible step as methods which return a single Char cannot return these characters - if(negated) internalRanges.add(CharacterRange(Character.MIN_VALUE,Character.MAX_VALUE)) + if(negated) { + internalRanges.add(CharacterRange(Character.MIN_VALUE, Character.MAX_VALUE)) + } for (range in ranges) { - val max = maxOf(range.start, range.end) - val min = minOf(range.start, range.end) if(negated){ - remove(CharacterRange(min, max)) + remove(CharacterRange(range.start, range.end)) } else { - add(CharacterRange(min, max)) + add(CharacterRange(range.start, range.end)) } } - ranges.forEach { - if(it.start.code > it.end.code){ - LoggingUtil.uniqueWarn(log, "Issue with Regex range, where '${it.start}' is greater than '${it.end}'") - } + // this could happen for example if we got a character class like [^\u0000-\uffff] + if(internalRanges.isEmpty()){ + throw IllegalArgumentException("No defined ranges") } } @@ -117,7 +130,7 @@ class CharacterRangeRxGene( } override fun isMutable(): Boolean { - return internalRanges.size > 1 || internalRanges[0].let { it.start != it.end } + return internalRanges.size > 1 || internalRanges[0].size > 1 } override fun copyContent(): Gene { @@ -128,12 +141,10 @@ class CharacterRangeRxGene( } override fun setValueWithRawString(value: String) { + // need to check val c = value.toCharArray().firstOrNull() - if (c!= null){ - val prev = this.value + if (c!= null) this.value = c - if (!isLocallyValid()) this.value = prev - } } override fun randomize(randomness: Randomness, tryToForceNewValue: Boolean) { @@ -151,7 +162,7 @@ class CharacterRangeRxGene( } currentRangeMinValue = currentRangeMaxValue } - throw IllegalArgumentException("No defined ranges") + assert(false) // internalRanges being empty should never happen } override fun shallowMutate(randomness: Randomness, apc: AdaptiveParameterControl, mwc: MutationWeightControl, selectionStrategy: SubsetGeneMutationSelectionStrategy, enableAdaptiveGeneMutation: Boolean, additionalGeneMutationInfo: AdditionalGeneMutationInfo?): Boolean { diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 4e63fd6aff..e707036017 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -16,7 +16,7 @@ class Randomness { companion object { private val log: Logger = LoggerFactory.getLogger(Randomness::class.java) - private fun stringToListOfCharPairs(s: String) : List { + private fun stringToListOfCharacterRanges(s: String) : List { return s.map { CharacterRange(it, it) } } } @@ -42,11 +42,12 @@ class Randomness { private val digitSet = listOf(CharacterRange('0', '9')) private val asciiLetterSet = listOf(CharacterRange('a', 'z'), CharacterRange('A', 'Z')) private val wordSet = listOf(CharacterRange('_', '_')) + asciiLetterSet + digitSet - private val spaceSet = stringToListOfCharPairs(" \t\r\n\u000C\u000b") + private val spaceSet = stringToListOfCharacterRanges(" \t\r\n\u000C\u000b") // u000b, u000c being line + // tabulation (VT) & form feed (FF, \f) respectively private val horizontalSpaceSet = listOf(CharacterRange(0x2000.toChar(), 0x200a.toChar())) + - stringToListOfCharPairs(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") - private val verticalSpaceSet = stringToListOfCharPairs("\n\u000B\u000C\r\u0085\u2028\u2029") - private val punctuationSet = stringToListOfCharPairs("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") + stringToListOfCharacterRanges(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") + private val verticalSpaceSet = stringToListOfCharacterRanges("\n\u000B\u000C\r\u0085\u2028\u2029") + private val punctuationSet = stringToListOfCharacterRanges("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") private val digitCharClass = CharacterRangeRxGene(false, digitSet) private val asciiLetterCharClass = CharacterRangeRxGene(false, asciiLetterSet) @@ -71,9 +72,9 @@ class Randomness { "Alnum" to digitSet + asciiLetterSet, "Punct" to punctuationSet, "Graph" to digitSet + asciiLetterSet + punctuationSet, - "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharPairs("\u0020"), - "Blank" to stringToListOfCharPairs(" \t"), - "Cntrl" to listOf(CharacterRange(0.toChar(), 0x1f.toChar())) + stringToListOfCharPairs("\u007f"), + "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharacterRanges("\u0020"), + "Blank" to stringToListOfCharacterRanges(" \t"), + "Cntrl" to listOf(CharacterRange(0.toChar(), 0x1f.toChar())) + stringToListOfCharacterRanges("\u007f"), "XDigit" to listOf(CharacterRange('0', '9'), CharacterRange('a', 'f'), CharacterRange('A', 'F')), "Space" to spaceSet ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } From f848529f585974f7c85f87ed76ac924d39d6bd9a Mon Sep 17 00:00:00 2001 From: lmasroca Date: Fri, 27 Mar 2026 12:28:31 -0300 Subject: [PATCH 16/26] Moved CharacterRange class to src/main/kotlin/org/evomaster/core/utils --- .../parser/GenePostgresSimilarToVisitor.kt | 1 + .../core/parser/GeneRegexEcma262Visitor.kt | 1 + .../core/parser/GeneRegexJavaVisitor.kt | 1 + .../search/gene/regex/CharacterRangeRxGene.kt | 18 +----------------- .../core/search/service/Randomness.kt | 8 ++++---- .../org/evomaster/core/utils/CharacterRange.kt | 18 ++++++++++++++++++ .../core/search/gene/GeneSamplerForTests.kt | 1 + .../gene/RegexGeneStructureTest.kt | 10 ++++++++-- 8 files changed, 35 insertions(+), 23 deletions(-) create mode 100644 core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt index 99e37d24d0..b9239c4fca 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GenePostgresSimilarToVisitor.kt @@ -2,6 +2,7 @@ package org.evomaster.core.parser import org.evomaster.core.search.gene.Gene import org.evomaster.core.search.gene.regex.* +import org.evomaster.core.utils.CharacterRange /** * Created by arcuri82 on 12-Jun-19. diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt index ae53e44e41..3729b55c39 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexEcma262Visitor.kt @@ -1,6 +1,7 @@ package org.evomaster.core.parser import org.evomaster.core.search.gene.regex.* +import org.evomaster.core.utils.CharacterRange private const val EOF_TOKEN = "" /** diff --git a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt index 5d811571f9..04dc9d48fe 100644 --- a/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt +++ b/core/src/main/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitor.kt @@ -1,6 +1,7 @@ package org.evomaster.core.parser import org.evomaster.core.search.gene.regex.* +import org.evomaster.core.utils.CharacterRange private const val EOF_TOKEN = "" /** diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index a6ba5421ed..8de8f419e7 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -10,25 +10,9 @@ import org.evomaster.core.search.service.Randomness import org.evomaster.core.search.service.mutator.MutationWeightControl import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMutationInfo import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy +import org.evomaster.core.utils.CharacterRange import org.slf4j.LoggerFactory -class CharacterRange(val start: Char, val end: Char){ - companion object { - operator fun invoke(a: Char, b: Char): CharacterRange = - if (a <= b) CharacterRange(a, b) else CharacterRange(b, a) - - operator fun invoke(a: Int, b: Int): CharacterRange = - invoke(a.toChar(), b.toChar()) - } - - val size: Int - get() = end.code - start.code + 1 - operator fun contains(char: Char): Boolean = char in start..end - - operator fun component1(): Char = start - operator fun component2(): Char = end -} - class CharacterRangeRxGene( val negated: Boolean, val ranges: List diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index e707036017..933708e147 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -3,7 +3,7 @@ package org.evomaster.core.search.service import com.google.inject.Inject import org.evomaster.core.EMConfig import org.evomaster.core.search.gene.regex.CharacterRangeRxGene -import org.evomaster.core.search.gene.regex.CharacterRange +import org.evomaster.core.utils.CharacterRange import org.evomaster.core.utils.NumberCalculationUtil.calculateIncrement import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -44,7 +44,7 @@ class Randomness { private val wordSet = listOf(CharacterRange('_', '_')) + asciiLetterSet + digitSet private val spaceSet = stringToListOfCharacterRanges(" \t\r\n\u000C\u000b") // u000b, u000c being line // tabulation (VT) & form feed (FF, \f) respectively - private val horizontalSpaceSet = listOf(CharacterRange(0x2000.toChar(), 0x200a.toChar())) + + private val horizontalSpaceSet = listOf(CharacterRange(0x2000, 0x200a)) + stringToListOfCharacterRanges(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") private val verticalSpaceSet = stringToListOfCharacterRanges("\n\u000B\u000C\r\u0085\u2028\u2029") private val punctuationSet = stringToListOfCharacterRanges("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") @@ -66,7 +66,7 @@ class Randomness { private val posixCharClasses = mapOf( "Lower" to listOf(CharacterRange('a', 'z')), "Upper" to listOf(CharacterRange('A', 'Z')), - "ASCII" to listOf(CharacterRange(0.toChar(), 0x7f.toChar())), + "ASCII" to listOf(CharacterRange(0, 0x7f)), "Alpha" to asciiLetterSet, "Digit" to digitSet, "Alnum" to digitSet + asciiLetterSet, @@ -74,7 +74,7 @@ class Randomness { "Graph" to digitSet + asciiLetterSet + punctuationSet, "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharacterRanges("\u0020"), "Blank" to stringToListOfCharacterRanges(" \t"), - "Cntrl" to listOf(CharacterRange(0.toChar(), 0x1f.toChar())) + stringToListOfCharacterRanges("\u007f"), + "Cntrl" to listOf(CharacterRange(0, 0x1f)) + stringToListOfCharacterRanges("\u007f"), "XDigit" to listOf(CharacterRange('0', '9'), CharacterRange('a', 'f'), CharacterRange('A', 'F')), "Space" to spaceSet ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } diff --git a/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt new file mode 100644 index 0000000000..41f75519cb --- /dev/null +++ b/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt @@ -0,0 +1,18 @@ +package org.evomaster.core.utils + +class CharacterRange private constructor(val start: Char, val end: Char){ + companion object { + operator fun invoke(a: Char, b: Char): CharacterRange = + if (a <= b) CharacterRange(a, b) else CharacterRange(b, a) + + operator fun invoke(a: Int, b: Int): CharacterRange = + invoke(a.toChar(), b.toChar()) + } + + val size: Int + get() = end.code - start.code + 1 + operator fun contains(char: Char): Boolean = char in start..end + + operator fun component1(): Char = start + operator fun component2(): Char = end +} \ No newline at end of file diff --git a/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt b/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt index f424cf1fdd..9b253caea2 100644 --- a/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt +++ b/core/src/test/kotlin/org/evomaster/core/search/gene/GeneSamplerForTests.kt @@ -28,6 +28,7 @@ import org.evomaster.core.search.gene.uri.UriGene import org.evomaster.core.search.gene.uri.UrlHttpGene import org.evomaster.core.search.gene.utils.NumberMutatorUtils import org.evomaster.core.search.service.Randomness +import org.evomaster.core.utils.CharacterRange import java.io.File import java.math.BigDecimal import java.math.BigInteger diff --git a/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt b/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt index 81b0e869e8..78b3a2aee1 100644 --- a/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/search/structuralelement/gene/RegexGeneStructureTest.kt @@ -3,6 +3,7 @@ package org.evomaster.core.search.structuralelement.gene import org.evomaster.core.parser.RegexHandler import org.evomaster.core.search.gene.Gene import org.evomaster.core.search.gene.regex.* +import org.evomaster.core.utils.CharacterRange import org.junit.jupiter.api.Assertions.* import org.junit.jupiter.api.Test @@ -51,7 +52,7 @@ class CharacterClassEscapeRxGeneStructureTest : GeneStructuralElementBaseTest() } class CharacterRangeRxGeneStructureTest : GeneStructuralElementBaseTest() { - override fun getCopyFromTemplate(): Gene = CharacterRangeRxGene(false, listOf(CharacterRange('0','9'))).apply { value='2'} + override fun getCopyFromTemplate(): Gene = CharacterRangeRxGene(false, listOf(CharacterRange('0', '9'))).apply { value='2'} override fun assertCopyFrom(base: Gene) { assertTrue(base is CharacterRangeRxGene) @@ -59,7 +60,12 @@ class CharacterRangeRxGeneStructureTest : GeneStructuralElementBaseTest() { assertEquals('2', (base as CharacterRangeRxGene).value) } - override fun getStructuralElement(): CharacterRangeRxGene = CharacterRangeRxGene(false, listOf(CharacterRange('0','z'))).apply { value= 'w' } + override fun getStructuralElement(): CharacterRangeRxGene = CharacterRangeRxGene(false, listOf( + CharacterRange( + '0', + 'z' + ) + )).apply { value= 'w' } override fun getExpectedChildrenSize(): Int = 0 } From 0c881c0d94e63d6192a67cef3cdd2f349fb9c05f Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 30 Mar 2026 00:45:32 -0300 Subject: [PATCH 17/26] Moved predefined character class definitions from Randomness.kt to CharacterClassEscapeRxGene.kt, restore old Randomness.kt --- .../gene/regex/CharacterClassEscapeRxGene.kt | 73 +++++++--- .../core/search/service/Randomness.kt | 125 ++++-------------- 2 files changed, 83 insertions(+), 115 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index 714293936c..c05fc7322d 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -10,7 +10,9 @@ import org.evomaster.core.search.service.Randomness import org.evomaster.core.search.service.mutator.MutationWeightControl import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMutationInfo import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy +import org.evomaster.core.utils.CharacterRange import org.slf4j.LoggerFactory +import kotlin.collections.contains /* \w Find a word character @@ -27,14 +29,65 @@ class CharacterClassEscapeRxGene( companion object{ private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) + + private fun stringToListOfCharacterRanges(s: String) : List { + return s.map { CharacterRange(it, it) } + } + + private val digitSet = listOf(CharacterRange('0', '9')) + private val asciiLetterSet = listOf(CharacterRange('a', 'z'), CharacterRange('A', 'Z')) + private val wordSet = listOf(CharacterRange('_', '_')) + asciiLetterSet + digitSet + private val spaceSet = stringToListOfCharacterRanges(" \t\r\n\u000C\u000b") // u000b, u000c being line + // tabulation (VT) & form feed (FF, \f) respectively + private val horizontalSpaceSet = listOf(CharacterRange(0x2000, 0x200a)) + + stringToListOfCharacterRanges(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") + private val verticalSpaceSet = stringToListOfCharacterRanges("\n\u000B\u000C\r\u0085\u2028\u2029") + private val punctuationSet = stringToListOfCharacterRanges("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") + + // US-ASCII POSIX character classes (\p{X}) + private val posixSets = mapOf( + "Lower" to listOf(CharacterRange('a', 'z')), + "Upper" to listOf(CharacterRange('A', 'Z')), + "ASCII" to listOf(CharacterRange(0, 0x7f)), + "Alpha" to asciiLetterSet, + "Digit" to digitSet, + "Alnum" to digitSet + asciiLetterSet, + "Punct" to punctuationSet, + "Graph" to digitSet + asciiLetterSet + punctuationSet, + "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharacterRanges("\u0020"), + "Blank" to stringToListOfCharacterRanges(" \t"), + "Cntrl" to listOf(CharacterRange(0, 0x1f)) + stringToListOfCharacterRanges("\u007f"), + "XDigit" to listOf(CharacterRange('0', '9'), CharacterRange('a', 'f'), CharacterRange('A', 'F')), + "Space" to spaceSet + ) } var value: String = "" + private var charClass: CharacterRangeRxGene init { - if (!listOf("w", "W", "d", "D", "s", "S", "v", "V", "h", "H").contains(type) && 'p' != type[0]) { + if (type[0] !in "wWdDsSvVhHp") { throw IllegalArgumentException("Invalid type: $type") } + + val charSet = when(type[0]){ + 'w', 'W' -> wordSet + 'd', 'D' -> digitSet + 's', 'S' -> spaceSet + 'v', 'V' -> verticalSpaceSet + 'h', 'H' -> horizontalSpaceSet + 'p' -> + if (type.substring(2, type.length - 1) !in posixSets){ + throw IllegalArgumentException("$type invalid/unsupported POSIX character class") + } else { + posixSets[type.substring(2, type.length - 1)]!! + } + else -> //this should never happen due to check in init + throw IllegalStateException("Type '\\$type' not supported yet") + } + + val negated = type[0].isUpperCase() + charClass = CharacterRangeRxGene(negated, charSet) } override fun checkForLocallyValidIgnoringChildren() : Boolean{ @@ -56,22 +109,8 @@ class CharacterClassEscapeRxGene( val previous = value - value = when(type[0]){ - 'd' -> randomness.nextDigitChar() - 'D' -> randomness.nextNonDigitChar() - 'w' -> randomness.nextWordChar() - 'W' -> randomness.nextNonWordChar() - 's' -> randomness.nextSpaceChar() - 'S' -> randomness.nextNonSpaceChar() - 'v' -> randomness.nextVerticalSpaceChar() - 'V' -> randomness.nextNonVerticalSpaceChar() - 'h' -> randomness.nextHorizontalSpaceChar() - 'H' -> randomness.nextNonHorizontalSpaceChar() - 'p' -> randomness.nextPosixCharClassChar(type) - else -> - //this should never happen due to check in init - throw IllegalStateException("Type '\\$type' not supported yet") - }.toString() + charClass.randomize(randomness, tryToForceNewValue) + value = charClass.value.toString() if(tryToForceNewValue && previous == value){ randomize(randomness, tryToForceNewValue) diff --git a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt index 933708e147..00d7cdd3f9 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/service/Randomness.kt @@ -2,8 +2,7 @@ package org.evomaster.core.search.service import com.google.inject.Inject import org.evomaster.core.EMConfig -import org.evomaster.core.search.gene.regex.CharacterRangeRxGene -import org.evomaster.core.utils.CharacterRange +import org.evomaster.core.utils.NumberCalculationUtil import org.evomaster.core.utils.NumberCalculationUtil.calculateIncrement import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -15,10 +14,6 @@ class Randomness { companion object { private val log: Logger = LoggerFactory.getLogger(Randomness::class.java) - - private fun stringToListOfCharacterRanges(s: String) : List { - return s.map { CharacterRange(it, it) } - } } @Inject @@ -39,50 +34,21 @@ class Randomness { updateSeed(configuration.seed) } - private val digitSet = listOf(CharacterRange('0', '9')) - private val asciiLetterSet = listOf(CharacterRange('a', 'z'), CharacterRange('A', 'Z')) - private val wordSet = listOf(CharacterRange('_', '_')) + asciiLetterSet + digitSet - private val spaceSet = stringToListOfCharacterRanges(" \t\r\n\u000C\u000b") // u000b, u000c being line - // tabulation (VT) & form feed (FF, \f) respectively - private val horizontalSpaceSet = listOf(CharacterRange(0x2000, 0x200a)) + - stringToListOfCharacterRanges(" \t\u00A0\u1680\u180e\u202f\u205f\u3000") - private val verticalSpaceSet = stringToListOfCharacterRanges("\n\u000B\u000C\r\u0085\u2028\u2029") - private val punctuationSet = stringToListOfCharacterRanges("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") - - private val digitCharClass = CharacterRangeRxGene(false, digitSet) - private val asciiLetterCharClass = CharacterRangeRxGene(false, asciiLetterSet) - private val wordCharClass = CharacterRangeRxGene(false, wordSet) - private val spaceCharClass = CharacterRangeRxGene(false, spaceSet) - private val horizontalSpaceCharClass = CharacterRangeRxGene(false, horizontalSpaceSet) - private val verticalSpaceCharClass = CharacterRangeRxGene(false, verticalSpaceSet) - - private val nonDigitCharClass = CharacterRangeRxGene(true, digitSet) - private val nonWordCharClass = CharacterRangeRxGene(true, wordSet) - private val nonSpaceCharClass = CharacterRangeRxGene(true, spaceSet) - private val nonHorizontalSpaceCharClass = CharacterRangeRxGene(true, horizontalSpaceSet) - private val nonVerticalSpaceCharClass = CharacterRangeRxGene(true, verticalSpaceSet) - - // US-ASCII POSIX character classes (\p{X}) - private val posixCharClasses = mapOf( - "Lower" to listOf(CharacterRange('a', 'z')), - "Upper" to listOf(CharacterRange('A', 'Z')), - "ASCII" to listOf(CharacterRange(0, 0x7f)), - "Alpha" to asciiLetterSet, - "Digit" to digitSet, - "Alnum" to digitSet + asciiLetterSet, - "Punct" to punctuationSet, - "Graph" to digitSet + asciiLetterSet + punctuationSet, - "Print" to digitSet + asciiLetterSet + punctuationSet + stringToListOfCharacterRanges("\u0020"), - "Blank" to stringToListOfCharacterRanges(" \t"), - "Cntrl" to listOf(CharacterRange(0, 0x1f)) + stringToListOfCharacterRanges("\u007f"), - "XDigit" to listOf(CharacterRange('0', '9'), CharacterRange('a', 'f'), CharacterRange('A', 'F')), - "Space" to spaceSet - ).mapValues { (_, value) -> CharacterRangeRxGene(false, value) } - - private val simpleAsciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" - private val simpleWordSet = "_0123456789$simpleAsciiLetterSet" - - private val wordChars = simpleWordSet.map { it.toInt() }.sorted() + private val digitSet = "0123456789" + private val asciiLetterSet = "abcdefghilmnopqrstuvzjkwxyABCDEFGHILMNOPQRSTUVZJKWXY" + private val norwegianLetterSet = "æøåÆØÅ" + + private val wordSet = "_$digitSet$asciiLetterSet" + private val spaceSet = " \t\r\n" + private val punctuationSet = "!@#$%^&*()[]{}<>:;|" + + private val allSet = "$wordSet$spaceSet$norwegianLetterSet$punctuationSet" + + private val nonWordSet = allSet.replace(wordSet,"") + private val nonDigitSet = allSet.replace(digitSet, "") + private val nonSpaceSet = allSet.replace(spaceSet, "") + + private val wordChars = wordSet.map { it.toInt() }.sorted() /** * A negative value means the current CPU time clock is used instead @@ -298,7 +264,10 @@ class Randomness { } fun nextLetter(): Char { - val k = nextFromCharClass(asciiLetterCharClass) + + val characters = asciiLetterSet + + val k = characters[random.nextInt(characters.length)] log.trace("nextLetter(): {}", k) return k } @@ -307,82 +276,42 @@ class Randomness { return set[random.nextInt(set.length)] } - fun nextFromCharClass(cc: CharacterRangeRxGene) : Char{ - cc.randomize(this, false) - val k = cc.value - // is it necessary to log this? - log.trace("nextFromCharClass(): {}", k) - return k - } - fun nextWordChar(): Char { - val k = nextFromCharClass(wordCharClass) + val k = nextFromStringSet(wordSet) log.trace("nextWordChar(): {}", k) return k } - fun nextNonWordChar() : Char { - val k = nextFromCharClass(nonWordCharClass) + fun nextNonWordChar() : Char{ + val k = nextFromStringSet(nonWordSet) log.trace("nextNonWordChar(): {}", k) return k } fun nextDigitChar(): Char { - val k = nextFromCharClass(digitCharClass) + val k = nextFromStringSet(digitSet) log.trace("nextDigitChar(): {}", k) return k } fun nextNonDigitChar(): Char { - val k = nextFromCharClass(nonDigitCharClass) + val k = nextFromStringSet(nonDigitSet) log.trace("nextNonDigitChar(): {}", k) return k } fun nextSpaceChar(): Char { - val k = nextFromCharClass(spaceCharClass) + val k = nextFromStringSet(spaceSet) log.trace("nextSpaceChar(): {}", k) return k } fun nextNonSpaceChar(): Char { - val k = nextFromCharClass(nonSpaceCharClass) + val k = nextFromStringSet(nonSpaceSet) log.trace("nextNonSpaceChar(): {}", k) return k } - fun nextVerticalSpaceChar(): Char { - val k = nextFromCharClass(verticalSpaceCharClass) - log.trace("nextVerticalSpaceChar(): {}", k) - return k - } - - fun nextNonVerticalSpaceChar(): Char { - val k = nextFromCharClass(nonVerticalSpaceCharClass) - log.trace("nextNonVerticalSpaceChar(): {}", k) - return k - } - - fun nextHorizontalSpaceChar(): Char { - val k = nextFromCharClass(horizontalSpaceCharClass) - log.trace("nextHorizontalSpaceChar(): {}", k) - return k - } - - fun nextNonHorizontalSpaceChar(): Char { - val k = nextFromCharClass(nonHorizontalSpaceCharClass) - log.trace("nextNonHorizontalSpaceChar(): {}", k) - return k - } - - fun nextPosixCharClassChar(type: String): Char { - if (type.substring(2,type.length-1) !in posixCharClasses){ - throw IllegalArgumentException("$type invalid/unsupported POSIX character class") - } - val k = nextFromCharClass(posixCharClasses[type.substring(2,type.length-1)]!!) - log.trace("nextPosixCharClassChar({}): {}", type, k) - return k - } fun wordCharPool() = wordChars From bfb0acae6654ccf26684b3701e0aa160869bad06 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 30 Mar 2026 00:46:26 -0300 Subject: [PATCH 18/26] Reversed iterations increase as new changes made the increase unnecessary --- .../e2etests/emb/json/language/LanguageServerExampleEMTest.java | 2 +- .../rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java b/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java index 7c8b93e342..552439df3b 100644 --- a/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java +++ b/core-tests/e2e-tests/spring/emb-json/src/test/java/org/evomaster/e2etests/emb/json/language/LanguageServerExampleEMTest.java @@ -32,7 +32,7 @@ public void runEMTest() throws Throwable { runTestHandlingFlakyAndCompilation( "LanguageServerExampleGeneratedEMTest", "org.foo.LanguageServerExampleGeneratedEMTest", - 5_000, + 4_000, true, (args) -> { diff --git a/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java b/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java index 95dfa3be9c..0d8f6ebe18 100644 --- a/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java +++ b/core-tests/e2e-tests/spring/spring-rest-redis/src/test/java/org/evomaster/e2etests/spring/rest/redis/lettuce/findhash/RedisLettuceFindHashEMTest.java @@ -27,7 +27,7 @@ public void testFindHashEM() throws Throwable { runTestHandlingFlakyAndCompilation( "RedisLettuceFindHashEM", "org.foo.spring.rest.redis.RedisLettuceFindHashEM", - 2000, + 1000, true, (args) -> { setOption(args, "heuristicsForRedis", "true"); From 2c114aa66196e718e058d0e5ac9dba4578b7143e Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 30 Mar 2026 03:24:24 -0300 Subject: [PATCH 19/26] Moved some logic regarding the construction of valid ranges from CharacterRangeRxGene.kt to new class src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange --- .../search/gene/regex/CharacterRangeRxGene.kt | 122 +++--------------- .../evomaster/core/utils/CharacterRange.kt | 5 +- .../core/utils/MultiCharacterRange.kt | 94 ++++++++++++++ 3 files changed, 117 insertions(+), 104 deletions(-) create mode 100644 core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 8de8f419e7..58974ae91a 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -11,114 +11,34 @@ import org.evomaster.core.search.service.mutator.MutationWeightControl import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMutationInfo import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy import org.evomaster.core.utils.CharacterRange +import org.evomaster.core.utils.MultiCharacterRange import org.slf4j.LoggerFactory -class CharacterRangeRxGene( - val negated: Boolean, - val ranges: List -) : RxAtom, SimpleGene("."){ - - companion object{ - private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) - } - +class CharacterRangeRxGene private constructor( /** * this represents the valid ranges for a character class, removing overlaps and applying negation */ - private var internalRanges = mutableListOf() - - init { - if(ranges.isEmpty()){ - throw IllegalArgumentException("No defined ranges") - } - - // this limits the character class complements to 0xffff instead of allowing up to 0x10ffff, but values over - // 0xffff are not permitted on Char as they need 2 Chars to be represented; to allow this, we would need to - // use String or Int in every possible step as methods which return a single Char cannot return these characters - if(negated) { - internalRanges.add(CharacterRange(Character.MIN_VALUE, Character.MAX_VALUE)) - } - for (range in ranges) { - if(negated){ - remove(CharacterRange(range.start, range.end)) - } else { - add(CharacterRange(range.start, range.end)) - } - } - - // this could happen for example if we got a character class like [^\u0000-\uffff] - if(internalRanges.isEmpty()){ - throw IllegalArgumentException("No defined ranges") - } - } - - var value : Char = internalRanges[0].start - - /** - * Adds a character range to a [org.evomaster.core.search.gene.regex.CharacterRangeRxGene]. - * - * The range is added to the character class in a way that does not generate repeated elements. - * - * @param toAdd The character range to be added to the character class. - */ - private fun add(toAdd: CharacterRange) { - val newInternalRanges = mutableListOf() - var currentStart = toAdd.start - var currentEnd = toAdd.end - var merged = false - - for ((start, end) in internalRanges.sortedBy { it.start }){ - when { - end < currentStart - 1 -> newInternalRanges += CharacterRange(start, end) - start > currentEnd + 1 -> { - if (!merged) { - newInternalRanges += CharacterRange(currentStart, currentEnd) - merged = true - } - newInternalRanges += CharacterRange(start, end) - } - else -> { - currentStart = minOf(currentStart, start) - currentEnd = maxOf(currentEnd, end) - } - } - } + val validRanges: MultiCharacterRange +) : RxAtom, SimpleGene("."){ - if (!merged) { - newInternalRanges += CharacterRange(currentStart, currentEnd) - } + constructor(negated: Boolean, ranges: List) : this(MultiCharacterRange(negated, ranges)) - internalRanges = newInternalRanges + companion object{ + private val log = LoggerFactory.getLogger(CharacterRangeRxGene::class.java) } - /** - * Safely removes a character range from a [org.evomaster.core.search.gene.regex.CharacterRangeRxGene]. - * - * @param toRemove The character range to be removed from the character class. - */ - private fun remove(toRemove: CharacterRange) { - internalRanges = internalRanges.flatMap { r -> - when { - toRemove.end < r.start || toRemove.start > r.end -> - listOf(r) - else -> buildList { - if (toRemove.start > r.start) add(CharacterRange(r.start, toRemove.start - 1)) - if (toRemove.end < r.end) add(CharacterRange(toRemove.end + 1, r.end)) - } - } - }.toMutableList() - } + var value : Char = validRanges[0].start override fun checkForLocallyValidIgnoringChildren() : Boolean{ - return internalRanges.any { value in it } + return validRanges.any { value in it } } override fun isMutable(): Boolean { - return internalRanges.size > 1 || internalRanges[0].size > 1 + return validRanges.size > 1 || validRanges[0].size > 1 } override fun copyContent(): Gene { - val copy = CharacterRangeRxGene(negated, ranges) + val copy = CharacterRangeRxGene(validRanges) copy.value = this.value copy.name = this.name //in case name is changed from its default return copy @@ -132,10 +52,10 @@ class CharacterRangeRxGene( } override fun randomize(randomness: Randomness, tryToForceNewValue: Boolean) { - val total = internalRanges.sumOf { it.size } + val total = validRanges.sumOf { it.size } val sampledValue = randomness.nextInt(total) var currentRangeMinValue = 0 - for (r in internalRanges) { + for (r in validRanges) { val currentRangeMaxValue = currentRangeMinValue + r.size if (sampledValue < currentRangeMaxValue) { val codePoint = r.start.code + (sampledValue - currentRangeMinValue) @@ -151,8 +71,8 @@ class CharacterRangeRxGene( override fun shallowMutate(randomness: Randomness, apc: AdaptiveParameterControl, mwc: MutationWeightControl, selectionStrategy: SubsetGeneMutationSelectionStrategy, enableAdaptiveGeneMutation: Boolean, additionalGeneMutationInfo: AdditionalGeneMutationInfo?): Boolean { var t = 0 - for(i in 0 until internalRanges.size){ - val p = internalRanges[i] + for(i in 0 until validRanges.size){ + val p = validRanges[i] if(value in p){ t = i break @@ -161,18 +81,18 @@ class CharacterRangeRxGene( val delta = randomness.choose(listOf(1,-1)) - if(value + delta > internalRanges[t].end){ + if(value + delta > validRanges[t].end){ /* going over current max range. check next range and take its minimum */ - val next = (t+1) % internalRanges.size - value = internalRanges[next].start + val next = (t+1) % validRanges.size + value = validRanges[next].start - } else if(value + delta < internalRanges[t].start){ + } else if(value + delta < validRanges[t].start){ - val previous = (t - 1 + internalRanges.size) % internalRanges.size - value = internalRanges[previous].end + val previous = (t - 1 + validRanges.size) % validRanges.size + value = validRanges[previous].end } else { value += delta diff --git a/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt index 41f75519cb..a14e9472bd 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt @@ -4,11 +4,10 @@ class CharacterRange private constructor(val start: Char, val end: Char){ companion object { operator fun invoke(a: Char, b: Char): CharacterRange = if (a <= b) CharacterRange(a, b) else CharacterRange(b, a) - - operator fun invoke(a: Int, b: Int): CharacterRange = - invoke(a.toChar(), b.toChar()) } + constructor(a: Int, b: Int) : this(a.toChar(), b.toChar()) + val size: Int get() = end.code - start.code + 1 operator fun contains(char: Char): Boolean = char in start..end diff --git a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt new file mode 100644 index 0000000000..225c3baede --- /dev/null +++ b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt @@ -0,0 +1,94 @@ +package org.evomaster.core.utils + +class MultiCharacterRange private constructor(val ranges: List) { + + companion object { + operator fun invoke(negated: Boolean, ranges: List): MultiCharacterRange { + if (ranges.isEmpty()) { + throw IllegalArgumentException("No defined ranges") + } + + var internalRanges = mutableListOf() + + if (negated) { + internalRanges.add(CharacterRange(Character.MIN_VALUE, Character.MAX_VALUE)) + } + for (range in ranges) { + internalRanges = if (negated) { + remove(internalRanges, CharacterRange(range.start, range.end)) + } else { + add(internalRanges, CharacterRange(range.start, range.end)) + } + } + + if (internalRanges.isEmpty()) { + throw IllegalArgumentException("No defined ranges") + } + + return MultiCharacterRange(internalRanges) + } + + /** + * Adds a character range to the given list of ranges without generating overlaps. + * + * @param internalRanges The current list of character ranges. + * @param toAdd The character range to add. + * @return A new list of character ranges with [toAdd] merged in. + */ + private fun add(internalRanges: MutableList, toAdd: CharacterRange): MutableList { + val newInternalRanges = mutableListOf() + var currentStart = toAdd.start + var currentEnd = toAdd.end + var merged = false + + for ((start, end) in internalRanges.sortedBy { it.start }) { + when { + end < currentStart - 1 -> newInternalRanges += CharacterRange(start, end) + start > currentEnd + 1 -> { + if (!merged) { + newInternalRanges += CharacterRange(currentStart, currentEnd) + merged = true + } + newInternalRanges += CharacterRange(start, end) + } + else -> { + currentStart = minOf(currentStart, start) + currentEnd = maxOf(currentEnd, end) + } + } + } + + if (!merged) { + newInternalRanges += CharacterRange(currentStart, currentEnd) + } + + return newInternalRanges + } + + /** + * Removes a character range from the given list of ranges, splitting existing ranges if necessary. + * + * @param internalRanges The current list of character ranges. + * @param toRemove The character range to remove. + * @return A new list of character ranges with [toRemove] excluded. + */ + private fun remove(internalRanges: MutableList, toRemove: CharacterRange): MutableList { + return internalRanges.flatMap { r -> + when { + toRemove.end < r.start || toRemove.start > r.end -> + listOf(r) + else -> buildList { + if (toRemove.start > r.start) add(CharacterRange(r.start, toRemove.start - 1)) + if (toRemove.end < r.end) add(CharacterRange(toRemove.end + 1, r.end)) + } + } + }.toMutableList() + } + } + + val size: Int get() = ranges.size + operator fun get(index: Int): CharacterRange = ranges[index] + operator fun iterator(): Iterator = ranges.iterator() + fun sumOf(selector: (CharacterRange) -> Int): Int = ranges.sumOf(selector) + fun any(predicate: (CharacterRange) -> Boolean): Boolean = ranges.any(predicate) +} \ No newline at end of file From df807cc13292286a221778acaf6680edbe2df4d1 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 30 Mar 2026 16:04:32 -0300 Subject: [PATCH 20/26] Increasing iterations for e2e test --- .../spring/rest/mongo/findoneby/MongoFindOneByEMTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-tests/e2e-tests/spring/spring-rest-mongo/src/test/java/org/evomaster/e2etests/spring/rest/mongo/findoneby/MongoFindOneByEMTest.java b/core-tests/e2e-tests/spring/spring-rest-mongo/src/test/java/org/evomaster/e2etests/spring/rest/mongo/findoneby/MongoFindOneByEMTest.java index a48d0366d1..50d671989a 100644 --- a/core-tests/e2e-tests/spring/spring-rest-mongo/src/test/java/org/evomaster/e2etests/spring/rest/mongo/findoneby/MongoFindOneByEMTest.java +++ b/core-tests/e2e-tests/spring/spring-rest-mongo/src/test/java/org/evomaster/e2etests/spring/rest/mongo/findoneby/MongoFindOneByEMTest.java @@ -34,7 +34,7 @@ public void testFindOneOnGivenEndpoint(String endpoint) throws Throwable { runTestHandlingFlaky( "MongoFindOneByEM_" + id, "org.foo.spring.rest.mongo.MongoFindOneByEM"+id, - 1000, + 2000, true, (args) -> { setOption(args, "taintForceSelectionOfGenesWithSpecialization", "true"); From 3a0afa1bdd2f32851b76a5334225a4f0a46c33ff Mon Sep 17 00:00:00 2001 From: lmasroca Date: Mon, 30 Mar 2026 16:43:30 -0300 Subject: [PATCH 21/26] Added a some comments --- core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 index b71fb3123a..dd7a62ee65 100644 --- a/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 +++ b/core/src/main/antlr4/org/evomaster/core/parser/RegexJava.g4 @@ -125,7 +125,8 @@ fragment CharacterEscape | HexEscapeSequence | UnicodeEscapeSequence | OctalEscapeSequence - | 'p' BRACE_open PosixCharacterClassLabel BRACE_close + | 'p' BRACE_open PosixCharacterClassLabel BRACE_close // this is only implemented in Java at the moment as on JS this + // is allowed only while certain flags are enabled //| IdentityEscape ; @@ -254,6 +255,7 @@ AtomEscape fragment CharacterClassEscape //one of d D s S w W v V h H // v, V, h and H are java8 exclusive, they represent vertical spaces and horizaontal spaces respectively + // see https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html for more information : [dDsSwWvVhH] ; From 359f2e5c95f471ade99805ad00736bac97b9e375 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 31 Mar 2026 20:20:30 -0300 Subject: [PATCH 22/26] Moved character sampling logic from CharacterRangeRxGene.randomize(...) to new method MultiCharacterRange.sample(...) --- .../search/gene/regex/CharacterRangeRxGene.kt | 21 +++++-------- .../core/utils/MultiCharacterRange.kt | 31 +++++++++++++++++-- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt index 58974ae91a..ee52e0678b 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterRangeRxGene.kt @@ -52,21 +52,14 @@ class CharacterRangeRxGene private constructor( } override fun randomize(randomness: Randomness, tryToForceNewValue: Boolean) { - val total = validRanges.sumOf { it.size } - val sampledValue = randomness.nextInt(total) - var currentRangeMinValue = 0 - for (r in validRanges) { - val currentRangeMaxValue = currentRangeMinValue + r.size - if (sampledValue < currentRangeMaxValue) { - val codePoint = r.start.code + (sampledValue - currentRangeMinValue) - // is it necessary to log this? - log.trace("using Int {} as character selector for character class, resulting in code point: {}, which is: {}", sampledValue, codePoint, codePoint.toChar()) - value = codePoint.toChar() - return - } - currentRangeMinValue = currentRangeMaxValue + + val previous = value + + value = validRanges.sample(randomness) + + if(tryToForceNewValue && previous == value){ + randomize(randomness, tryToForceNewValue) } - assert(false) // internalRanges being empty should never happen } override fun shallowMutate(randomness: Randomness, apc: AdaptiveParameterControl, mwc: MutationWeightControl, selectionStrategy: SubsetGeneMutationSelectionStrategy, enableAdaptiveGeneMutation: Boolean, additionalGeneMutationInfo: AdditionalGeneMutationInfo?): Boolean { diff --git a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt index 225c3baede..12d66c0817 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/MultiCharacterRange.kt @@ -1,8 +1,13 @@ package org.evomaster.core.utils +import org.evomaster.core.search.service.Randomness +import org.slf4j.LoggerFactory + class MultiCharacterRange private constructor(val ranges: List) { companion object { + private val log = LoggerFactory.getLogger(MultiCharacterRange::class.java) + operator fun invoke(negated: Boolean, ranges: List): MultiCharacterRange { if (ranges.isEmpty()) { throw IllegalArgumentException("No defined ranges") @@ -86,9 +91,31 @@ class MultiCharacterRange private constructor(val ranges: List) } } + /** + * Uniformly samples a random character from the valid characters in a MultiCharacterRange. + * + * @param randomness The randomness source used to perform the uniform sampling. + * @return The sampled character. + */ + fun sample(randomness: Randomness): Char { + val total = ranges.sumOf { it.size } + val sampledValue = randomness.nextInt(total) + var currentRangeMinValue = 0 + for (r in ranges) { + val currentRangeMaxValue = currentRangeMinValue + r.size + if (sampledValue < currentRangeMaxValue) { + val codePoint = r.start.code + (sampledValue - currentRangeMinValue) + // is it necessary to log this? + log.trace("using Int {} as character selector for character class, resulting in code point: {}, which is: {}", sampledValue, codePoint, codePoint.toChar()) + return codePoint.toChar() + } + currentRangeMinValue = currentRangeMaxValue + } + assert(false) // internal ranges being empty should never happen + return '0' + } + val size: Int get() = ranges.size operator fun get(index: Int): CharacterRange = ranges[index] - operator fun iterator(): Iterator = ranges.iterator() - fun sumOf(selector: (CharacterRange) -> Int): Int = ranges.sumOf(selector) fun any(predicate: (CharacterRange) -> Boolean): Boolean = ranges.any(predicate) } \ No newline at end of file From 4a361da074b96c90047d71eb696d5da712a209fa Mon Sep 17 00:00:00 2001 From: lmasroca Date: Tue, 31 Mar 2026 20:49:02 -0300 Subject: [PATCH 23/26] Made all CharacterClassEscapeRxGene objects of same type share a stateless MultiCharacterRange to sample characters instead of creating a different CharacterRangeRxGene for each CharacterClassEscapeRxGene instance --- .../gene/regex/CharacterClassEscapeRxGene.kt | 50 ++++++++++++------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt index c05fc7322d..1eb4669e92 100644 --- a/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt +++ b/core/src/main/kotlin/org/evomaster/core/search/gene/regex/CharacterClassEscapeRxGene.kt @@ -11,6 +11,7 @@ import org.evomaster.core.search.service.mutator.MutationWeightControl import org.evomaster.core.search.service.mutator.genemutation.AdditionalGeneMutationInfo import org.evomaster.core.search.service.mutator.genemutation.SubsetGeneMutationSelectionStrategy import org.evomaster.core.utils.CharacterRange +import org.evomaster.core.utils.MultiCharacterRange import org.slf4j.LoggerFactory import kotlin.collections.contains @@ -24,7 +25,7 @@ import kotlin.collections.contains \p{X} Find a character from X POSIX character class (eg:\p{Lower}) */ class CharacterClassEscapeRxGene( - val type: String + val type: String ) : RxAtom, SimpleGene("\\$type") { companion object{ @@ -44,8 +45,20 @@ class CharacterClassEscapeRxGene( private val verticalSpaceSet = stringToListOfCharacterRanges("\n\u000B\u000C\r\u0085\u2028\u2029") private val punctuationSet = stringToListOfCharacterRanges("""!"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~""") + private val digitMultiCharRange = MultiCharacterRange(false, digitSet) + private val wordMultiCharRange = MultiCharacterRange(false, wordSet) + private val spaceMultiCharRange = MultiCharacterRange(false, spaceSet) + private val horizontalSpaceMultiCharRange = MultiCharacterRange(false, horizontalSpaceSet) + private val verticalSpaceMultiCharRange = MultiCharacterRange(false, verticalSpaceSet) + + private val nonDigitMultiCharRange = MultiCharacterRange(true, digitSet) + private val nonWordMultiCharRange = MultiCharacterRange(true, wordSet) + private val nonSpaceMultiCharRange = MultiCharacterRange(true, spaceSet) + private val nonHorizontalSpaceMultiCharRange = MultiCharacterRange(true, horizontalSpaceSet) + private val nonVerticalSpaceMultiCharRange = MultiCharacterRange(true, verticalSpaceSet) + // US-ASCII POSIX character classes (\p{X}) - private val posixSets = mapOf( + private val posixMultiCharRanges = mapOf( "Lower" to listOf(CharacterRange('a', 'z')), "Upper" to listOf(CharacterRange('A', 'Z')), "ASCII" to listOf(CharacterRange(0, 0x7f)), @@ -59,35 +72,37 @@ class CharacterClassEscapeRxGene( "Cntrl" to listOf(CharacterRange(0, 0x1f)) + stringToListOfCharacterRanges("\u007f"), "XDigit" to listOf(CharacterRange('0', '9'), CharacterRange('a', 'f'), CharacterRange('A', 'F')), "Space" to spaceSet - ) + ).mapValues { (_, value) -> MultiCharacterRange(false, value) } } var value: String = "" - private var charClass: CharacterRangeRxGene + private var multiCharRange: MultiCharacterRange init { if (type[0] !in "wWdDsSvVhHp") { throw IllegalArgumentException("Invalid type: $type") } - val charSet = when(type[0]){ - 'w', 'W' -> wordSet - 'd', 'D' -> digitSet - 's', 'S' -> spaceSet - 'v', 'V' -> verticalSpaceSet - 'h', 'H' -> horizontalSpaceSet + multiCharRange = when(type[0]){ + 'w' -> wordMultiCharRange + 'W' -> nonWordMultiCharRange + 'd' -> digitMultiCharRange + 'D' -> nonDigitMultiCharRange + 's' -> spaceMultiCharRange + 'S' -> nonSpaceMultiCharRange + 'v' -> verticalSpaceMultiCharRange + 'V' -> nonVerticalSpaceMultiCharRange + 'h' -> horizontalSpaceMultiCharRange + 'H' -> nonHorizontalSpaceMultiCharRange 'p' -> - if (type.substring(2, type.length - 1) !in posixSets){ + if (type.substring(2, type.length - 1) !in posixMultiCharRanges){ throw IllegalArgumentException("$type invalid/unsupported POSIX character class") } else { - posixSets[type.substring(2, type.length - 1)]!! + posixMultiCharRanges[type.substring(2, type.length - 1)]!! } else -> //this should never happen due to check in init throw IllegalStateException("Type '\\$type' not supported yet") } - - val negated = type[0].isUpperCase() - charClass = CharacterRangeRxGene(negated, charSet) } override fun checkForLocallyValidIgnoringChildren() : Boolean{ @@ -109,8 +124,7 @@ class CharacterClassEscapeRxGene( val previous = value - charClass.randomize(randomness, tryToForceNewValue) - value = charClass.value.toString() + value = multiCharRange.sample(randomness).toString() if(tryToForceNewValue && previous == value){ randomize(randomness, tryToForceNewValue) @@ -134,7 +148,7 @@ class CharacterClassEscapeRxGene( } override fun getValueAsPrintableString(previousGenes: List, mode: GeneUtils.EscapeMode?, targetFormat: OutputFormat?, extraCheck: Boolean): String { - return value + return value } From c3b38e028c62241231874199451f08972b482a17 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 1 Apr 2026 15:29:14 -0300 Subject: [PATCH 24/26] Made decreasing ranges inside character classes illegal to comply with Java/JS regex standards. --- .../org/evomaster/core/utils/CharacterRange.kt | 13 +++++++------ .../core/parser/GeneRegexJavaVisitorTest.kt | 7 ------- .../org/evomaster/core/parser/RegexHandlerTest.kt | 2 ++ 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt b/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt index a14e9472bd..3e8da0e2ab 100644 --- a/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt +++ b/core/src/main/kotlin/org/evomaster/core/utils/CharacterRange.kt @@ -1,13 +1,14 @@ package org.evomaster.core.utils -class CharacterRange private constructor(val start: Char, val end: Char){ - companion object { - operator fun invoke(a: Char, b: Char): CharacterRange = - if (a <= b) CharacterRange(a, b) else CharacterRange(b, a) - } - +class CharacterRange constructor(val start: Char, val end: Char){ constructor(a: Int, b: Int) : this(a.toChar(), b.toChar()) + init { + if (start > end){ + throw IllegalArgumentException("Range out of order in character class") + } + } + val size: Int get() = end.code - start.code + 1 operator fun contains(char: Char): Boolean = char in start..end diff --git a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt index afb628b096..29cb9f1b29 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/GeneRegexJavaVisitorTest.kt @@ -66,13 +66,6 @@ class GeneRegexJavaVisitorTest : GeneRegexEcma262VisitorTest() { checkSameAsJava("[ -!]") } - @Test - fun testDecreasingRange(){ - //checkSameAsJava("[!- ]") //not valid in Java - //checkSameAsJava("[9-1]") //not valid in Java - checkCanSample("[9-1]", listOf("1","5","9"),200) - } - @Test fun testJavaHexEscape(){ checkSameAsJava("""x{3}\x{0}\x{FFFf}\x{0FFFf}\x{01FFFf}\x{10FFFf}""") diff --git a/core/src/test/kotlin/org/evomaster/core/parser/RegexHandlerTest.kt b/core/src/test/kotlin/org/evomaster/core/parser/RegexHandlerTest.kt index 1fe87b5887..569d8f63e8 100644 --- a/core/src/test/kotlin/org/evomaster/core/parser/RegexHandlerTest.kt +++ b/core/src/test/kotlin/org/evomaster/core/parser/RegexHandlerTest.kt @@ -160,6 +160,7 @@ internal class RegexHandlerTest{ assertThrows(IllegalArgumentException::class.java) { RegexHandler.createGeneForJVM("\\x{ffffff}") } assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForJVM("\\0") } assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForJVM("\\09") } + assertThrows(IllegalArgumentException::class.java) { RegexHandler.createGeneForJVM("[9-1]") } } @Test @@ -167,5 +168,6 @@ internal class RegexHandlerTest{ assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForEcma262("\\xR") } assertThrows(ParseCancellationException::class.java) { RegexHandler.createGeneForJVM("\\ugggg") } + assertThrows(IllegalArgumentException::class.java) { RegexHandler.createGeneForJVM("[9-1]") } } } From 44762e614d5bb7fc878a43108a0f8c91dc07dff6 Mon Sep 17 00:00:00 2001 From: lmasroca Date: Wed, 1 Apr 2026 23:05:20 -0300 Subject: [PATCH 25/26] Increasing iterations for e2e test --- .../openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt index e12da56a2a..90ee47b9f5 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt @@ -29,7 +29,7 @@ class FlakinessDetectBlackboxEMTest : SpringTestBase() { runTestHandlingFlakyAndCompilation( outputFolder, outputClass, - 100 + 200 ) { args: MutableList -> From 66504e41a5e9ac4241d42f4b3f1cc767492af61a Mon Sep 17 00:00:00 2001 From: lmasroca Date: Thu, 2 Apr 2026 19:56:58 -0300 Subject: [PATCH 26/26] Increasing iterations for e2e test --- .../openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt index 90ee47b9f5..3dc94d3fca 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/flakinessdetect/FlakinessDetectBlackboxEMTest.kt @@ -29,7 +29,7 @@ class FlakinessDetectBlackboxEMTest : SpringTestBase() { runTestHandlingFlakyAndCompilation( outputFolder, outputClass, - 200 + 1000 ) { args: MutableList ->