use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class DummyModelsLimeExplainerTest method testMapOneFeatureToOutputRegression.
@ParameterizedTest
@ValueSource(longs = { 0 })
void testMapOneFeatureToOutputRegression(long seed) throws Exception {
Random random = new Random();
int idx = 1;
List<Feature> features = new LinkedList<>();
features.add(TestUtils.getMockedNumericFeature(100));
features.add(TestUtils.getMockedNumericFeature(20));
features.add(TestUtils.getMockedNumericFeature(0.1));
PredictionInput input = new PredictionInput(features);
PredictionProvider model = TestUtils.getFeaturePassModel(idx);
List<PredictionOutput> outputs = model.predictAsync(List.of(input)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
Prediction prediction = new SimplePrediction(input, outputs.get(0));
LimeConfig limeConfig = new LimeConfig().withSamples(100).withPerturbationContext(new PerturbationContext(seed, random, 1));
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertNotNull(saliency);
List<FeatureImportance> topFeatures = saliency.getTopFeatures(3);
assertEquals(3, topFeatures.size());
assertEquals(1d, ExplainabilityMetrics.impactScore(model, prediction, topFeatures));
}
int topK = 1;
double minimumPositiveStabilityRate = 0.5;
double minimumNegativeStabilityRate = 0.5;
TestUtils.assertLimeStability(model, prediction, limeExplainer, topK, minimumPositiveStabilityRate, minimumNegativeStabilityRate);
List<PredictionInput> inputs = new ArrayList<>();
for (int i = 0; i < 100; i++) {
List<Feature> fs = new LinkedList<>();
fs.add(TestUtils.getMockedNumericFeature());
fs.add(TestUtils.getMockedNumericFeature());
fs.add(TestUtils.getMockedNumericFeature());
inputs.add(new PredictionInput(fs));
}
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
int k = 2;
int chunkSize = 10;
String decision = "feature-" + idx;
double precision = ExplainabilityMetrics.getLocalSaliencyPrecision(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(precision).isZero();
double recall = ExplainabilityMetrics.getLocalSaliencyRecall(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(recall).isEqualTo(1);
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(f1).isZero();
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class HighScoreNumericFeatureZonesProviderTest method testNonEmptyData.
@Test
void testNonEmptyData() {
Random random = new Random();
random.setSeed(0);
PerturbationContext perturbationContext = new PerturbationContext(random, 1);
List<Feature> features = new ArrayList<>();
PredictionProvider predictionProvider = TestUtils.getSumThresholdModel(0.1, 0.1);
List<FeatureDistribution> featureDistributions = new ArrayList<>();
int nf = 4;
for (int i = 0; i < nf; i++) {
Feature numericalFeature = FeatureFactory.newNumericalFeature("f-" + i, Double.NaN);
features.add(numericalFeature);
List<Value> values = new ArrayList<>();
for (int r = 0; r < 4; r++) {
values.add(Type.NUMBER.randomValue(perturbationContext));
}
featureDistributions.add(new GenericFeatureDistribution(numericalFeature, values));
}
DataDistribution dataDistribution = new IndependentFeaturesDataDistribution(featureDistributions);
Map<String, HighScoreNumericFeatureZones> highScoreFeatureZones = HighScoreNumericFeatureZonesProvider.getHighScoreFeatureZones(dataDistribution, predictionProvider, features, 10);
assertThat(highScoreFeatureZones).isNotNull();
assertThat(highScoreFeatureZones.size()).isEqualTo(4);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class DataUtilsTest method testBootstrap.
@Test
void testBootstrap() {
List<Value> values = new ArrayList<>();
PerturbationContext perturbationContext = new PerturbationContext(random, 1);
for (int i = 0; i < 4; i++) {
values.add(Type.NUMBER.randomValue(perturbationContext));
}
Feature mockedNumericFeature = TestUtils.getMockedNumericFeature();
DataDistribution dataDistribution = new IndependentFeaturesDataDistribution(List.of(new GenericFeatureDistribution(mockedNumericFeature, values)));
Map<String, FeatureDistribution> featureDistributionMap = DataUtils.boostrapFeatureDistributions(dataDistribution, perturbationContext, 10, 1, 500, new HashMap<>());
assertThat(featureDistributionMap).isNotNull();
assertThat(featureDistributionMap).isNotEmpty();
FeatureDistribution actual = featureDistributionMap.get(mockedNumericFeature.getName());
assertThat(actual).isNotNull();
List<Value> allSamples = actual.getAllSamples();
assertThat(allSamples).isNotNull();
assertThat(allSamples).hasSize(10);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class DataUtilsTest method testRandomDistributionGeneration.
@Test
void testRandomDistributionGeneration() {
DataDistribution dataDistribution = DataUtils.generateRandomDataDistribution(10, 10, random);
assertNotNull(dataDistribution);
assertNotNull(dataDistribution.asFeatureDistributions());
for (FeatureDistribution featureDistribution : dataDistribution.asFeatureDistributions()) {
assertNotNull(featureDistribution);
}
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class LimeConfigOptimizerTest method testSameConfig.
@Test
void testSameConfig() throws ExecutionException, InterruptedException {
long seed = 0;
List<LimeConfig> optimizedConfigs = new ArrayList<>();
PredictionProvider model = TestUtils.getSumSkipModel(1);
DataDistribution dataDistribution = DataUtils.generateRandomDataDistribution(5, 100, new Random());
List<PredictionInput> samples = dataDistribution.sample(3);
List<PredictionOutput> predictionOutputs = model.predictAsync(samples).get();
List<Prediction> predictions = DataUtils.getPredictions(samples, predictionOutputs);
for (int i = 0; i < 2; i++) {
Random random = new Random();
LimeConfig initialConfig = new LimeConfig().withSamples(10).withPerturbationContext(new PerturbationContext(seed, random, 1));
LimeConfigOptimizer limeConfigOptimizer = new LimeConfigOptimizer().withDeterministicExecution(true).withStepCountLimit(10).withTimeLimit(10);
LimeConfig optimizedConfig = limeConfigOptimizer.optimize(initialConfig, predictions, model);
optimizedConfigs.add(optimizedConfig);
}
LimeConfig first = optimizedConfigs.get(0);
LimeConfig second = optimizedConfigs.get(1);
assertThat(first.getNoOfRetries()).isEqualTo(second.getNoOfRetries());
assertThat(first.getNoOfSamples()).isEqualTo(second.getNoOfSamples());
assertThat(first.getProximityFilteredDatasetMinimum()).isEqualTo(second.getProximityFilteredDatasetMinimum());
assertThat(first.getProximityKernelWidth()).isEqualTo(second.getProximityKernelWidth());
assertThat(first.getProximityThreshold()).isEqualTo(second.getProximityThreshold());
assertThat(first.isProximityFilter()).isEqualTo(second.isProximityFilter());
assertThat(first.isAdaptDatasetVariance()).isEqualTo(second.isAdaptDatasetVariance());
assertThat(first.isPenalizeBalanceSparse()).isEqualTo(second.isPenalizeBalanceSparse());
assertThat(first.getEncodingParams().getNumericTypeClusterGaussianFilterWidth()).isEqualTo(second.getEncodingParams().getNumericTypeClusterGaussianFilterWidth());
assertThat(first.getEncodingParams().getNumericTypeClusterThreshold()).isEqualTo(second.getEncodingParams().getNumericTypeClusterThreshold());
assertThat(first.getSeparableDatasetRatio()).isEqualTo(second.getSeparableDatasetRatio());
assertThat(first.getPerturbationContext().getNoOfPerturbations()).isEqualTo(second.getPerturbationContext().getNoOfPerturbations());
}
Aggregations