diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt index ce5d590bf3..0d5ae426f7 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt @@ -39,31 +39,53 @@ abstract class AIClassificationEMTestBase : SpringTestBase(){ return ei } + private fun isWeakClassifier( + model: AIResponseClassifier, + action: RestCallAction, + weaknessThreshold: Double + ): Boolean { + + val metrics = model.estimateMetrics(action.endpoint) + + return metrics.precision400 <= weaknessThreshold + || metrics.sensitivity400 <= weaknessThreshold + || metrics.specificity <= weaknessThreshold + || metrics.npv <= weaknessThreshold + } + protected fun verifyModel( injector: Injector, ok2xx: List, fail400: List, - threshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold + repairThreshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold, + weaknessThreshold: Double = injector.getInstance(EMConfig::class.java).aIResponseClassifierWeaknessThreshold ) { val model = injector.getInstance(AIResponseClassifier::class.java) model.disableLearning() // no side-effects + for(ok in ok2xx){ + + if (isWeakClassifier(model, ok, weaknessThreshold)) continue + val resOK = evaluateAction(injector, ok) assertTrue(resOK.getStatusCode() in 200..299) val mOK= model.classify(ok) assertTrue( - mOK.probabilityOf400() < threshold, + mOK.probabilityOf400() < repairThreshold, "Too high probability of 400 for OK ${ok.getName()}: ${mOK.probabilityOf400()}") } for(fail in fail400) { + + if (isWeakClassifier(model, fail, weaknessThreshold)) continue + val resFail = evaluateAction(injector, fail) assertEquals(400, resFail.getStatusCode()) val mFail = model.classify(fail) assertTrue( - mFail.probabilityOf400() >= threshold, + mFail.probabilityOf400() >= repairThreshold, "Too low probability of 400 for Fail ${fail.getName()}: ${mFail.probabilityOf400()}" ) } diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt index f8192f790d..01e8a98c51 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt @@ -27,7 +27,7 @@ class ACAllOrNoneEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt index 588147fff1..eff0c37bca 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt @@ -27,7 +27,7 @@ class ACArithmeticEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt index 56480e4490..cc378138fa 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt @@ -21,12 +21,13 @@ class ACBasicEMTest : AIClassificationEMTestBase() { } } + @Disabled @Test fun testRunDeterministic(){ testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt index d6edcff61e..5351e6e26c 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt @@ -35,7 +35,7 @@ class ACImplyEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt index 44c36ee82b..d4482d6f86 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt @@ -36,7 +36,7 @@ class ACMixedEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt index 1b87a2a099..79ec3a7a7e 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt @@ -33,7 +33,7 @@ class ACOnlyOneEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt index 59e1ee7b2d..7352b3704b 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt @@ -32,7 +32,7 @@ class ACOrEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt index 4ea48f3823..32185990b0 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt @@ -31,7 +31,7 @@ class ACRequiredEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt index 1d4eb762ad..b7c86e7c0e 100644 --- a/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt +++ b/core-tests/e2e-tests/spring/spring-rest-openapi-v3/src/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt @@ -35,7 +35,7 @@ class ACZeroOrOneEMTest : AIClassificationEMTestBase() { testRunEM(AIResponseClassifierModel.DETERMINISTIC) } - @Disabled + @Test fun testRunGaussian(){ testRunEM(AIResponseClassifierModel.GAUSSIAN) diff --git a/core/src/main/kotlin/org/evomaster/core/EMConfig.kt b/core/src/main/kotlin/org/evomaster/core/EMConfig.kt index 6bd096e17f..2fb8f2c38e 100644 --- a/core/src/main/kotlin/org/evomaster/core/EMConfig.kt +++ b/core/src/main/kotlin/org/evomaster/core/EMConfig.kt @@ -1563,7 +1563,7 @@ class EMConfig { @PercentageAsProbability(false) @Cfg("If using THRESHOLD for AI Classification Repair, specify its value." + " All classifications with probability equal or above such threshold value will be accepted.") - var classificationRepairThreshold = 0.8 + var classificationRepairThreshold = 0.5 @Experimental @Cfg("Specify how the classification of actions's response will be used to execute a possible repair on the action.") @@ -1602,7 +1602,7 @@ class EMConfig { @Experimental @Cfg("Minimum confidence threshold required for the AI response classifier to decide" + "whether to send a request as-is or attempt a repair.") - var aIResponseClassifierWeaknessThreshold = 0.4 + var aIResponseClassifierWeaknessThreshold = 0.8 @Cfg("Output a JSON file representing statistics of the fuzzing session, written in the WFC Report format." + " This also includes a index.html web application to visualize such data.") diff --git a/docs/options.md b/docs/options.md index 7f5c5369b0..f7461e6c1f 100644 --- a/docs/options.md +++ b/docs/options.md @@ -245,7 +245,7 @@ There are 3 types of options: |Options|Description| |---|---| |`aIClassificationMetrics`| __Enum__. Determines which metric-tracking strategy is used by the AI response classifier. *Valid values*: `TIME_WINDOW, FULL_HISTORY`. *Default value*: `FULL_HISTORY`.| -|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.4`.| +|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.8`.| |`abstractInitializationGeneToMutate`| __Boolean__. During mutation, whether to abstract genes for repeated SQL actions. *Default value*: `false`.| |`aiClassifierRepairActivation`| __Enum__. Specify how the classification of actions's response will be used to execute a possible repair on the action. *Valid values*: `PROBABILITY, THRESHOLD`. *Default value*: `THRESHOLD`.| |`aiEncoderType`| __Enum__. The encoding strategy applied to transform raw data to the encoded version. *Valid values*: `RAW, NORMAL, UNIT_NORMAL`. *Default value*: `RAW`.| @@ -259,7 +259,7 @@ There are 3 types of options: |`breederTruncationFraction`| __Double__. Breeder GA: fraction of top individuals to keep in parents pool (truncation). *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.| |`callbackURLHostname`| __String__. HTTP callback verifier hostname. Default is set to 'localhost'. If the SUT is running inside a container (i.e., Docker), 'localhost' will refer to the container. This can be used to change the hostname. *Default value*: `localhost`.| |`cgaNeighborhoodModel`| __Enum__. Cellular GA: neighborhood model (RING, L5, C9, C13). *Valid values*: `RING, L5, C9, C13`. *Default value*: `RING`.| -|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.8`.| +|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.| |`discoveredInfoRewardedInFitness`| __Boolean__. If there is new discovered information from a test execution, reward it in the fitness function. *Default value*: `false`.| |`dockerLocalhost`| __Boolean__. Replace references to 'localhost' to point to the actual host machine. Only needed when running EvoMaster inside Docker. *Default value*: `false`.| |`dpcTargetTestSize`| __Int__. Specify a max size of a test to be targeted when either DPC_INCREASING or DPC_DECREASING is enabled. *Default value*: `1`.|