use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class LimeConfigTest method testDataDistribution.
@Test
void testDataDistribution() {
DataDistribution dd = mock(DataDistribution.class);
LimeConfig config = new LimeConfig().withDataDistribution(dd);
assertThat(config.getDataDistribution()).isEqualTo(dd);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class PartialDependencePlotExplainer method explainFromDataDistribution.
private List<PartialDependenceGraph> explainFromDataDistribution(PredictionProvider model, int outputSize, DataDistribution dataDistribution) throws InterruptedException, ExecutionException, TimeoutException {
long start = System.currentTimeMillis();
List<PartialDependenceGraph> pdps = new ArrayList<>();
List<FeatureDistribution> featureDistributions = dataDistribution.asFeatureDistributions();
// fetch entire data distributions for all features
List<PredictionInput> trainingData = dataDistribution.sample(config.getSeriesLength());
// create a PDP for each feature
for (FeatureDistribution featureDistribution : featureDistributions) {
// generate (further) samples for the feature under analysis
// TBD: maybe just reuse trainingData
List<Value> xsValues = featureDistribution.sample(config.getSeriesLength()).stream().sorted(// sort alphanumerically (if Value#asNumber is NaN)
Comparator.comparing(Value::asString)).sorted(// sort by natural order
(v1, v2) -> Comparator.comparingDouble(Value::asNumber).compare(v1, v2)).distinct().collect(Collectors.toList());
List<Feature> featureXSvalues = // transform sampled Values into Features
xsValues.stream().map(v -> FeatureFactory.copyOf(featureDistribution.getFeature(), v)).collect(Collectors.toList());
// create a PDP for each feature and each output
for (int outputIndex = 0; outputIndex < outputSize; outputIndex++) {
PartialDependenceGraph partialDependenceGraph = getPartialDependenceGraph(model, trainingData, xsValues, featureXSvalues, outputIndex);
pdps.add(partialDependenceGraph);
}
}
long end = System.currentTimeMillis();
LOGGER.debug("explanation time: {}ms", (end - start));
return pdps;
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class LimeExplainer method getPerturbedInputs.
private List<PredictionInput> getPerturbedInputs(List<Feature> features, LimeConfig executionConfig, PredictionProvider predictionProvider) {
List<PredictionInput> perturbedInputs = new ArrayList<>();
int size = executionConfig.getNoOfSamples();
DataDistribution dataDistribution = executionConfig.getDataDistribution();
Map<String, FeatureDistribution> featureDistributionsMap;
PerturbationContext perturbationContext = executionConfig.getPerturbationContext();
if (!dataDistribution.isEmpty()) {
Map<String, HighScoreNumericFeatureZones> numericFeatureZonesMap;
int max = executionConfig.getBoostrapInputs();
if (executionConfig.isHighScoreFeatureZones()) {
numericFeatureZonesMap = HighScoreNumericFeatureZonesProvider.getHighScoreFeatureZones(dataDistribution, predictionProvider, features, max);
} else {
numericFeatureZonesMap = new HashMap<>();
}
// generate feature distributions, if possible
featureDistributionsMap = DataUtils.boostrapFeatureDistributions(dataDistribution, perturbationContext, 2 * size, 1, Math.min(size, max), numericFeatureZonesMap);
} else {
featureDistributionsMap = new HashMap<>();
}
for (int i = 0; i < size; i++) {
List<Feature> newFeatures = DataUtils.perturbFeatures(features, perturbationContext, featureDistributionsMap);
perturbedInputs.add(new PredictionInput(newFeatures));
}
return perturbedInputs;
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class TrafficViolationDmnLimeExplainerTest method testTrafficViolationDMNExplanation.
@Test
void testTrafficViolationDMNExplanation() throws ExecutionException, InterruptedException, TimeoutException {
PredictionProvider model = getModel();
PredictionInput predictionInput = getTestInput();
List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(predictionInput)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
Prediction prediction = new SimplePrediction(predictionInput, predictionOutputs.get(0));
Random random = new Random();
PerturbationContext perturbationContext = new PerturbationContext(0L, random, 1);
LimeConfig limeConfig = new LimeConfig().withSamples(10).withPerturbationContext(perturbationContext);
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertNotNull(saliency);
List<String> strings = saliency.getTopFeatures(3).stream().map(f -> f.getFeature().getName()).collect(Collectors.toList());
assertTrue(strings.contains("Actual Speed") || strings.contains("Speed Limit"));
}
assertDoesNotThrow(() -> ValidationUtils.validateLocalSaliencyStability(model, prediction, limeExplainer, 1, 0.3, 0.3));
String decision = "Fine";
List<PredictionInput> inputs = new ArrayList<>();
for (int n = 0; n < 10; n++) {
inputs.add(new PredictionInput(DataUtils.perturbFeatures(predictionInput.getFeatures(), perturbationContext)));
}
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
int k = 2;
int chunkSize = 5;
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
AssertionsForClassTypes.assertThat(f1).isBetween(0.5d, 1d);
}
use of org.kie.kogito.explainability.model.DataDistribution in project kogito-apps by kiegroup.
the class PmmlRegressionCategoricalLimeExplainerTest method testPMMLRegressionCategorical.
@Disabled("See KOGITO-6154")
@Test
void testPMMLRegressionCategorical() throws Exception {
PredictionInput input = getTestInput();
Random random = new Random();
LimeConfig limeConfig = new LimeConfig().withSamples(10).withAdaptiveVariance(true).withPerturbationContext(new PerturbationContext(0L, random, 1));
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
PredictionProvider model = getModel();
List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(input)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
assertThat(predictionOutputs).isNotNull().isNotEmpty();
PredictionOutput output = predictionOutputs.get(0);
assertThat(output).isNotNull();
Prediction prediction = new SimplePrediction(input, output);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertThat(saliency).isNotNull();
double v = ExplainabilityMetrics.impactScore(model, prediction, saliency.getTopFeatures(2));
assertThat(v).isEqualTo(1d);
}
assertDoesNotThrow(() -> ValidationUtils.validateLocalSaliencyStability(model, prediction, limeExplainer, 1, 0.5, 0.5));
List<PredictionInput> inputs = getSamples();
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
String decision = "result";
int k = 1;
int chunkSize = 2;
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
AssertionsForClassTypes.assertThat(f1).isBetween(0d, 1d);
}
Aggregations