Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -39,31 +39,53 @@ abstract class AIClassificationEMTestBase : SpringTestBase(){
return ei
}

private fun isWeakClassifier(
model: AIResponseClassifier,
action: RestCallAction,
weaknessThreshold: Double
): Boolean {

val metrics = model.estimateMetrics(action.endpoint)

return metrics.precision400 <= weaknessThreshold
|| metrics.sensitivity400 <= weaknessThreshold
|| metrics.specificity <= weaknessThreshold
|| metrics.npv <= weaknessThreshold
}

protected fun verifyModel(
injector: Injector,
ok2xx: List<RestCallAction>,
fail400: List<RestCallAction>,
threshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold
repairThreshold: Double = injector.getInstance(EMConfig::class.java).classificationRepairThreshold,
weaknessThreshold: Double = injector.getInstance(EMConfig::class.java).aIResponseClassifierWeaknessThreshold
) {

val model = injector.getInstance(AIResponseClassifier::class.java)
model.disableLearning() // no side-effects


for(ok in ok2xx){

if (isWeakClassifier(model, ok, weaknessThreshold)) continue
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i m unsure about this... we will need to discuss. for example, if the model is always weak, would it mean this test will always pass? that would be against the point of having a E2E. or is guassian not able to reliably solve these simples APIs in these E2Es?


val resOK = evaluateAction(injector, ok)
assertTrue(resOK.getStatusCode() in 200..299)
val mOK= model.classify(ok)
assertTrue(
mOK.probabilityOf400() < threshold,
mOK.probabilityOf400() < repairThreshold,
"Too high probability of 400 for OK ${ok.getName()}: ${mOK.probabilityOf400()}")
}

for(fail in fail400) {

if (isWeakClassifier(model, fail, weaknessThreshold)) continue

val resFail = evaluateAction(injector, fail)
assertEquals(400, resFail.getStatusCode())
val mFail = model.classify(fail)
assertTrue(
mFail.probabilityOf400() >= threshold,
mFail.probabilityOf400() >= repairThreshold,
"Too low probability of 400 for Fail ${fail.getName()}: ${mFail.probabilityOf400()}"
)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ACAllOrNoneEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class ACArithmeticEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ class ACBasicEMTest : AIClassificationEMTestBase() {
}
}

@Disabled
@Test
fun testRunDeterministic(){
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class ACImplyEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ACMixedEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ACOnlyOneEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class ACOrEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ class ACRequiredEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class ACZeroOrOneEMTest : AIClassificationEMTestBase() {
testRunEM(AIResponseClassifierModel.DETERMINISTIC)
}

@Disabled

@Test
fun testRunGaussian(){
testRunEM(AIResponseClassifierModel.GAUSSIAN)
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/kotlin/org/evomaster/core/EMConfig.kt
Original file line number Diff line number Diff line change
Expand Up @@ -1563,7 +1563,7 @@ class EMConfig {
@PercentageAsProbability(false)
@Cfg("If using THRESHOLD for AI Classification Repair, specify its value." +
" All classifications with probability equal or above such threshold value will be accepted.")
var classificationRepairThreshold = 0.8
var classificationRepairThreshold = 0.5
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these changes based on latest experiments?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah... i see you wrote it in the description of this PR... :)


@Experimental
@Cfg("Specify how the classification of actions's response will be used to execute a possible repair on the action.")
Expand Down Expand Up @@ -1602,7 +1602,7 @@ class EMConfig {
@Experimental
@Cfg("Minimum confidence threshold required for the AI response classifier to decide" +
"whether to send a request as-is or attempt a repair.")
var aIResponseClassifierWeaknessThreshold = 0.4
var aIResponseClassifierWeaknessThreshold = 0.8
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these changes based on latest experiments?


@Cfg("Output a JSON file representing statistics of the fuzzing session, written in the WFC Report format." +
" This also includes a index.html web application to visualize such data.")
Expand Down
4 changes: 2 additions & 2 deletions docs/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ There are 3 types of options:
|Options|Description|
|---|---|
|`aIClassificationMetrics`| __Enum__. Determines which metric-tracking strategy is used by the AI response classifier. *Valid values*: `TIME_WINDOW, FULL_HISTORY`. *Default value*: `FULL_HISTORY`.|
|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.4`.|
|`aIResponseClassifierWeaknessThreshold`| __Double__. Minimum confidence threshold required for the AI response classifier to decidewhether to send a request as-is or attempt a repair. *Default value*: `0.8`.|
|`abstractInitializationGeneToMutate`| __Boolean__. During mutation, whether to abstract genes for repeated SQL actions. *Default value*: `false`.|
|`aiClassifierRepairActivation`| __Enum__. Specify how the classification of actions's response will be used to execute a possible repair on the action. *Valid values*: `PROBABILITY, THRESHOLD`. *Default value*: `THRESHOLD`.|
|`aiEncoderType`| __Enum__. The encoding strategy applied to transform raw data to the encoded version. *Valid values*: `RAW, NORMAL, UNIT_NORMAL`. *Default value*: `RAW`.|
Expand All @@ -259,7 +259,7 @@ There are 3 types of options:
|`breederTruncationFraction`| __Double__. Breeder GA: fraction of top individuals to keep in parents pool (truncation). *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.|
|`callbackURLHostname`| __String__. HTTP callback verifier hostname. Default is set to 'localhost'. If the SUT is running inside a container (i.e., Docker), 'localhost' will refer to the container. This can be used to change the hostname. *Default value*: `localhost`.|
|`cgaNeighborhoodModel`| __Enum__. Cellular GA: neighborhood model (RING, L5, C9, C13). *Valid values*: `RING, L5, C9, C13`. *Default value*: `RING`.|
|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.8`.|
|`classificationRepairThreshold`| __Double__. If using THRESHOLD for AI Classification Repair, specify its value. All classifications with probability equal or above such threshold value will be accepted. *Constraints*: `probability 0.0-1.0`. *Default value*: `0.5`.|
|`discoveredInfoRewardedInFitness`| __Boolean__. If there is new discovered information from a test execution, reward it in the fitness function. *Default value*: `false`.|
|`dockerLocalhost`| __Boolean__. Replace references to 'localhost' to point to the actual host machine. Only needed when running EvoMaster inside Docker. *Default value*: `false`.|
|`dpcTargetTestSize`| __Int__. Specify a max size of a test to be targeted when either DPC_INCREASING or DPC_DECREASING is enabled. *Default value*: `1`.|
Expand Down