WebFuzzing · MohsenTaheriShalmani · Mar 7, 2026 · Mar 7, 2026 · arcuri82 · Mar 8, 2026
diff --git a/...n/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt b/...n/org/evomaster/e2etests/spring/openapi/v3/aiclassification/AIClassificationEMTestBase.kt
@@ -39,31 +39,53 @@ abstract class AIClassificationEMTestBase : SpringTestBase(){
         return ei
     }
 
+    private fun isWeakClassifier(
+        model: AIResponseClassifier,
+        action: RestCallAction,
+        weaknessThreshold: Double
+    ): Boolean {
+
+        val metrics = model.estimateMetrics(action.endpoint)
+
+        return metrics.precision400 <= weaknessThreshold
+                || metrics.sensitivity400 <= weaknessThreshold
+                || metrics.specificity <= weaknessThreshold
+                || metrics.npv <= weaknessThreshold
+    }
+
     protected fun verifyModel(
         injector: Injector,
         ok2xx: List<RestCallAction>,
         fail400: List<RestCallAction>,
-        threshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold
+        repairThreshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold,
+        weaknessThreshold: Double = injector.getInstance(EMConfig::class.java).aIResponseClassifierWeaknessThreshold
     ) {
 
         val model = injector.getInstance(AIResponseClassifier::class.java)
         model.disableLearning() // no side-effects
 
+
         for(ok in ok2xx){
+
+            if (isWeakClassifier(model, ok, weaknessThreshold)) continue
+
             val resOK = evaluateAction(injector, ok)
             assertTrue(resOK.getStatusCode() in 200..299)
             val mOK= model.classify(ok)
             assertTrue(
-                mOK.probabilityOf400() < threshold,
+                mOK.probabilityOf400() < repairThreshold,
                 "Too high probability of 400 for OK ${ok.getName()}: ${mOK.probabilityOf400()}")
         }
 
         for(fail in fail400) {
+
+            if (isWeakClassifier(model, fail, weaknessThreshold)) continue
+
             val resFail = evaluateAction(injector, fail)
             assertEquals(400, resFail.getStatusCode())
             val mFail = model.classify(fail)
             assertTrue(
-                mFail.probabilityOf400() >= threshold,
+                mFail.probabilityOf400() >= repairThreshold,
                 "Too low probability of 400 for Fail ${fail.getName()}: ${mFail.probabilityOf400()}"
             )
         }

diff --git a/.../org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt b/.../org/evomaster/e2etests/spring/openapi/v3/aiclassification/allornone/ACAllOrNoneEMTest.kt
@@ -27,7 +27,7 @@ class ACAllOrNoneEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...rg/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt b/...rg/evomaster/e2etests/spring/openapi/v3/aiclassification/arithmetic/ACArithmeticEMTest.kt
@@ -27,7 +27,7 @@ class ACArithmeticEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...t/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt b/...t/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/basic/ACBasicEMTest.kt
@@ -21,12 +21,13 @@ class ACBasicEMTest : AIClassificationEMTestBase() {
         }
     }
 
+    @Disabled
     @Test
     fun testRunDeterministic(){
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...t/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt b/...t/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/imply/ACImplyEMTest.kt
@@ -35,7 +35,7 @@ class ACImplyEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...t/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt b/...t/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/mixed/ACMixedEMTest.kt
@@ -36,7 +36,7 @@ class ACMixedEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...tlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt b/...tlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/onlyone/ACOnlyOneEMTest.kt
@@ -33,7 +33,7 @@ class ACOnlyOneEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...rc/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt b/...rc/test/kotlin/org/evomaster/e2etests/spring/openapi/v3/aiclassification/or/ACOrEMTest.kt
@@ -32,7 +32,7 @@ class ACOrEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/...in/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt b/...in/org/evomaster/e2etests/spring/openapi/v3/aiclassification/required/ACRequiredEMTest.kt
@@ -31,7 +31,7 @@ class ACRequiredEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/.../org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt b/.../org/evomaster/e2etests/spring/openapi/v3/aiclassification/zeroorone/ACZeroOrOneEMTest.kt
@@ -35,7 +35,7 @@ class ACZeroOrOneEMTest : AIClassificationEMTestBase() {
         testRunEM(AIResponseClassifierModel.DETERMINISTIC)
     }
 
-    @Disabled
+
     @Test
     fun testRunGaussian(){
         testRunEM(AIResponseClassifierModel.GAUSSIAN)

diff --git a/core/src/main/kotlin/org/evomaster/core/EMConfig.kt b/core/src/main/kotlin/org/evomaster/core/EMConfig.kt
@@ -1563,7 +1563,7 @@ class EMConfig {
     @PercentageAsProbability(false)
     @Cfg("If using THRESHOLD for AI Classification Repair, specify its value." +
             " All classifications with probability equal or above such threshold value will be accepted.")
-    var classificationRepairThreshold = 0.8
+    var classificationRepairThreshold = 0.5
 
     @Experimental
     @Cfg("Specify how the classification of actions's response will be used to execute a possible repair on the action.")
@@ -1602,7 +1602,7 @@ class EMConfig {
     @Experimental
     @Cfg("Minimum confidence threshold required for the AI response classifier to decide" +
             "whether to send a request as-is or attempt a repair.")
-    var aIResponseClassifierWeaknessThreshold = 0.4
+    var aIResponseClassifierWeaknessThreshold = 0.8
 
     @Cfg("Output a JSON file representing statistics of the fuzzing session, written in the WFC Report format." +
             " This also includes a index.html web application to visualize such data.")

diff --git a/docs/options.md b/docs/options.md
@@ -245,7 +245,7 @@ There are 3 types of options:
 |Options|Description|
 |---|---|
 |`aIClassificationMetrics`| __Enum__. Determines which metric-tracking strategy is used by the AI response classifier. *Valid values*: `TIME_WINDOW, FULL_HISTORY`. *Default value*: `FULL_HISTORY`.|
-|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.4`.|
+|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.8`.|
 |`abstractInitializationGeneToMutate`| __Boolean__. During mutation, whether to abstract genes for repeated SQL actions. *Default value*: `false`.|
 |`aiClassifierRepairActivation`| __Enum__. Specify how the classification of actions's response will be used to execute a possible repair on the action. *Valid values*: `PROBABILITY, THRESHOLD`. *Default value*: `THRESHOLD`.|
 |`aiEncoderType`| __Enum__. The encoding strategy applied to transform raw data to the encoded version. *Valid values*: `RAW, NORMAL, UNIT_NORMAL`. *Default value*: `RAW`.|
@@ -259,7 +259,7 @@ There are 3 types of options:
 |`breederTruncationFraction`| __Double__. Breeder GA: fraction of top individuals to keep in parents pool (truncation). *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.|
 |`callbackURLHostname`| __String__. HTTP callback verifier hostname. Default is set to 'localhost'. If the SUT is running inside a container (i.e., Docker), 'localhost' will refer to the container. This can be used to change the hostname. *Default value*: `localhost`.|
 |`cgaNeighborhoodModel`| __Enum__. Cellular GA: neighborhood model (RING, L5, C9, C13). *Valid values*: `RING, L5, C9, C13`. *Default value*: `RING`.|
-|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.8`.|
+|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.|
 |`discoveredInfoRewardedInFitness`| __Boolean__. If there is new discovered information from a test execution, reward it in the fitness function. *Default value*: `false`.|
 |`dockerLocalhost`| __Boolean__. Replace references to 'localhost' to point to the actual host machine. Only needed when running EvoMaster inside Docker. *Default value*: `false`.|
 |`dpcTargetTestSize`| __Int__. Specify a max size of a test to be targeted when either DPC_INCREASING or DPC_DECREASING is enabled. *Default value*: `1`.|