use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class DummyModelsLimeExplainerTest method testTextSpamClassification.
@ParameterizedTest
@ValueSource(longs = { 0 })
void testTextSpamClassification(long seed) throws Exception {
Random random = new Random();
List<Feature> features = new LinkedList<>();
Function<String, List<String>> tokenizer = s -> Arrays.asList(s.split(" ").clone());
features.add(FeatureFactory.newFulltextFeature("f1", "we go here and there", tokenizer));
features.add(FeatureFactory.newFulltextFeature("f2", "please give me some money", tokenizer));
features.add(FeatureFactory.newFulltextFeature("f3", "dear friend, please reply", tokenizer));
PredictionInput input = new PredictionInput(features);
PredictionProvider model = TestUtils.getDummyTextClassifier();
List<PredictionOutput> outputs = model.predictAsync(List.of(input)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
Prediction prediction = new SimplePrediction(input, outputs.get(0));
LimeConfig limeConfig = new LimeConfig().withSamples(100).withPerturbationContext(new PerturbationContext(seed, random, 1));
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).toCompletableFuture().get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertNotNull(saliency);
List<FeatureImportance> topFeatures = saliency.getPositiveFeatures(1);
assertEquals(1, topFeatures.size());
assertEquals(1d, ExplainabilityMetrics.impactScore(model, prediction, topFeatures));
}
int topK = 1;
double minimumPositiveStabilityRate = 0.5;
double minimumNegativeStabilityRate = 0.2;
TestUtils.assertLimeStability(model, prediction, limeExplainer, topK, minimumPositiveStabilityRate, minimumNegativeStabilityRate);
List<PredictionInput> inputs = new ArrayList<>();
for (int i = 0; i < 100; i++) {
List<Feature> fs = new LinkedList<>();
fs.add(TestUtils.getMockedNumericFeature());
fs.add(TestUtils.getMockedNumericFeature());
fs.add(TestUtils.getMockedNumericFeature());
inputs.add(new PredictionInput(fs));
}
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
int k = 2;
int chunkSize = 10;
String decision = "spam";
double precision = ExplainabilityMetrics.getLocalSaliencyPrecision(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(precision).isEqualTo(1);
double recall = ExplainabilityMetrics.getLocalSaliencyRecall(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(recall).isEqualTo(1);
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(f1).isEqualTo(1);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class HighScoreNumericFeatureZonesProviderTest method testEmptyData.
@Test
void testEmptyData() {
List<Feature> features = new ArrayList<>();
PredictionProvider predictionProvider = TestUtils.getSumThresholdModel(0.1, 0.1);
List<FeatureDistribution> featureDistributions = new ArrayList<>();
DataDistribution dataDistribution = new IndependentFeaturesDataDistribution(featureDistributions);
Map<String, HighScoreNumericFeatureZones> highScoreFeatureZones = HighScoreNumericFeatureZonesProvider.getHighScoreFeatureZones(dataDistribution, predictionProvider, features, 10);
assertThat(highScoreFeatureZones).isNotNull();
assertThat(highScoreFeatureZones.size()).isZero();
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class LimeConfigOptimizerTest method assertConfigOptimized.
private void assertConfigOptimized(LimeConfigOptimizer limeConfigOptimizer) throws InterruptedException, java.util.concurrent.ExecutionException {
LimeConfig initialConfig = new LimeConfig().withSamples(10);
PredictionProvider model = TestUtils.getSumSkipModel(1);
Random random = new Random();
random.setSeed(4);
DataDistribution dataDistribution = DataUtils.generateRandomDataDistribution(5, 100, random);
List<PredictionInput> samples = dataDistribution.sample(10);
List<PredictionOutput> predictionOutputs = model.predictAsync(samples).get();
List<Prediction> predictions = DataUtils.getPredictions(samples, predictionOutputs);
LimeConfig optimizedConfig = limeConfigOptimizer.optimize(initialConfig, predictions, model);
assertThat(optimizedConfig).isNotNull();
Assertions.assertThat(optimizedConfig).isNotSameAs(initialConfig);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class DataUtilsTest method testReadCsv.
@Test
void testReadCsv() throws IOException {
List<Type> schema = new ArrayList<>();
schema.add(Type.CATEGORICAL);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.BOOLEAN);
schema.add(Type.NUMBER);
schema.add(Type.NUMBER);
DataDistribution dataDistribution = DataUtils.readCSV(Paths.get(getClass().getResource("/mini-train.csv").getFile()), schema);
assertThat(dataDistribution).isNotNull();
assertThat(dataDistribution.getAllSamples()).hasSize(10);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class PmmlScorecardCategoricalLimeExplainerTest method testPMMLScorecardCategorical.
@Test
void testPMMLScorecardCategorical() throws Exception {
PredictionInput input = getTestInput();
Random random = new Random();
LimeConfig limeConfig = new LimeConfig().withSamples(10).withPerturbationContext(new PerturbationContext(0L, random, 1));
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
PredictionProvider model = getModel();
List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(input)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
assertThat(predictionOutputs).isNotNull().isNotEmpty();
PredictionOutput output = predictionOutputs.get(0);
assertThat(output).isNotNull();
Prediction prediction = new SimplePrediction(input, output);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertThat(saliency).isNotNull();
double v = ExplainabilityMetrics.impactScore(model, prediction, saliency.getTopFeatures(2));
assertThat(v).isGreaterThan(0d);
}
assertDoesNotThrow(() -> ValidationUtils.validateLocalSaliencyStability(model, prediction, limeExplainer, 1, 0.4, 0.4));
List<PredictionInput> inputs = getSamples();
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
String decision = "score";
int k = 1;
int chunkSize = 2;
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
AssertionsForClassTypes.assertThat(f1).isBetween(0d, 1d);
}
Aggregations