use of org.kie.kogito.explainability.model.PredictionInputsDataDistribution in project kogito-apps by kiegroup.
the class DataUtils method readCSV.
/**
* Read a CSV file into a {@link DataDistribution} object.
*
* @param file the path to the CSV file
* @param schema an ordered list of {@link Type}s as the 'schema', used to determine
* the {@link Type} of each feature / column
* @return the parsed CSV as a {@link DataDistribution}
* @throws IOException when failing at reading the CSV file
* @throws MalformedInputException if any record in CSV has different size with respect to the specified schema
*/
public static DataDistribution readCSV(Path file, List<Type> schema) throws IOException {
List<PredictionInput> inputs = new ArrayList<>();
try (BufferedReader reader = Files.newBufferedReader(file)) {
Iterable<CSVRecord> records = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(reader);
for (CSVRecord record : records) {
int size = record.size();
if (schema.size() == size) {
List<Feature> features = new ArrayList<>();
for (int i = 0; i < size; i++) {
String s = record.get(i);
Type type = schema.get(i);
features.add(new Feature(record.getParser().getHeaderNames().get(i), type, new Value(s)));
}
inputs.add(new PredictionInput(features));
} else {
throw new MalformedInputException(size);
}
}
}
return new PredictionInputsDataDistribution(inputs);
}
use of org.kie.kogito.explainability.model.PredictionInputsDataDistribution in project kogito-apps by kiegroup.
the class HighScoreNumericFeatureZonesProvider method getHighScoreFeatureZones.
/**
* Get a map of feature-name -> high score feature zones. Predictions in data distribution are sorted by (descending)
* score, then the (aggregated) mean score is calculated and all the data points that are associated with a prediction
* having a score between the mean and the maximum are selected (feature-wise), with an associated tolerance
* (the stdDev of the high score feature points).
*
* @param dataDistribution a data distribution
* @param predictionProvider the model used to score the inputs
* @param features the list of features to associate high score points with
* @param maxNoOfSamples max no. of inputs used for discovering high score zones
* @return a map feature name -> high score numeric feature zones
*/
public static Map<String, HighScoreNumericFeatureZones> getHighScoreFeatureZones(DataDistribution dataDistribution, PredictionProvider predictionProvider, List<Feature> features, int maxNoOfSamples) {
Map<String, HighScoreNumericFeatureZones> numericFeatureZonesMap = new HashMap<>();
List<Prediction> scoreSortedPredictions = new ArrayList<>();
try {
scoreSortedPredictions.addAll(DataUtils.getScoreSortedPredictions(predictionProvider, new PredictionInputsDataDistribution(dataDistribution.sample(maxNoOfSamples))));
} catch (ExecutionException e) {
LOGGER.error("Could not sort predictions by score {}", e.getMessage());
} catch (InterruptedException e) {
LOGGER.error("Interrupted while waiting for sorting predictions by score {}", e.getMessage());
Thread.currentThread().interrupt();
} catch (TimeoutException e) {
LOGGER.error("Timed out while waiting for sorting predictions by score", e);
}
if (!scoreSortedPredictions.isEmpty()) {
// calculate min, max and mean scores
double max = scoreSortedPredictions.get(0).getOutput().getOutputs().stream().mapToDouble(Output::getScore).sum();
double min = scoreSortedPredictions.get(scoreSortedPredictions.size() - 1).getOutput().getOutputs().stream().mapToDouble(Output::getScore).sum();
if (max != min) {
double threshold = scoreSortedPredictions.stream().map(p -> p.getOutput().getOutputs().stream().mapToDouble(Output::getScore).sum()).mapToDouble(d -> d).average().orElse((max + min) / 2);
// filter out predictions whose score is in [min, threshold]
scoreSortedPredictions = scoreSortedPredictions.stream().filter(p -> p.getOutput().getOutputs().stream().mapToDouble(Output::getScore).sum() > threshold).collect(Collectors.toList());
for (int j = 0; j < features.size(); j++) {
Feature feature = features.get(j);
if (Type.NUMBER.equals(feature.getType())) {
int finalJ = j;
// get feature values associated with high score inputs
List<Double> topValues = scoreSortedPredictions.stream().map(prediction -> prediction.getInput().getFeatures().get(finalJ).getValue().asNumber()).distinct().collect(Collectors.toList());
// get high score points and tolerance
double[] highScoreFeaturePoints = topValues.stream().flatMapToDouble(DoubleStream::of).toArray();
double center = DataUtils.getMean(highScoreFeaturePoints);
double tolerance = DataUtils.getStdDev(highScoreFeaturePoints, center) / 2;
HighScoreNumericFeatureZones highScoreNumericFeatureZones = new HighScoreNumericFeatureZones(highScoreFeaturePoints, tolerance);
numericFeatureZonesMap.put(feature.getName(), highScoreNumericFeatureZones);
}
}
}
}
return numericFeatureZonesMap;
}
use of org.kie.kogito.explainability.model.PredictionInputsDataDistribution in project kogito-apps by kiegroup.
the class DummyDmnModelsLimeExplainerTest method testAllTypesDMNExplanation.
@Test
void testAllTypesDMNExplanation() throws ExecutionException, InterruptedException, TimeoutException {
DMNRuntime dmnRuntime = DMNKogito.createGenericDMNRuntime(new InputStreamReader(getClass().getResourceAsStream("/dmn/allTypes.dmn")));
assertThat(dmnRuntime.getModels().size()).isEqualTo(1);
final String namespace = "https://kiegroup.org/dmn/_24B9EC8C-2F02-40EB-B6BB-E8CDE82FBF08";
final String name = "new-file";
DecisionModel decisionModel = new DmnDecisionModel(dmnRuntime, namespace, name);
PredictionProvider model = new DecisionModelWrapper(decisionModel);
Map<String, Object> context = new HashMap<>();
context.put("stringInput", "test");
context.put("listOfStringInput", Collections.singletonList("test"));
context.put("numberInput", 1);
context.put("listOfNumbersInput", Collections.singletonList(1));
context.put("booleanInput", true);
context.put("listOfBooleansInput", Collections.singletonList(true));
context.put("timeInput", "h09:00");
context.put("dateInput", "2020-04-02");
context.put("dateAndTimeInput", "2020-04-02T09:00:00");
context.put("daysAndTimeDurationInput", "P1DT1H");
context.put("yearsAndMonthDurationInput", "P1Y1M");
Map<String, Object> complexInput = new HashMap<>();
complexInput.put("aNestedListOfNumbers", Collections.singletonList(1));
complexInput.put("aNestedString", "test");
complexInput.put("aNestedComplexInput", Collections.singletonMap("doubleNestedNumber", 1));
context.put("complexInput", complexInput);
context.put("listOfComplexInput", Collections.singletonList(complexInput));
List<Feature> features = new ArrayList<>();
features.add(FeatureFactory.newCompositeFeature("context", context));
PredictionInput predictionInput = new PredictionInput(features);
List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(predictionInput)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
Prediction prediction = new SimplePrediction(predictionInput, predictionOutputs.get(0));
Random random = new Random();
PerturbationContext perturbationContext = new PerturbationContext(0L, random, 3);
LimeConfig limeConfig = new LimeConfig().withSamples(10).withPerturbationContext(perturbationContext);
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertThat(saliency).isNotNull();
}
assertThatCode(() -> ValidationUtils.validateLocalSaliencyStability(model, prediction, limeExplainer, 1, 0.5, 0.2)).doesNotThrowAnyException();
String decision = "myDecision";
List<PredictionInput> inputs = new ArrayList<>();
for (int n = 0; n < 10; n++) {
inputs.add(new PredictionInput(DataUtils.perturbFeatures(features, perturbationContext)));
}
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
int k = 2;
int chunkSize = 5;
double precision = ExplainabilityMetrics.getLocalSaliencyPrecision(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(precision).isBetween(0d, 1d);
double recall = ExplainabilityMetrics.getLocalSaliencyRecall(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(recall).isBetween(0d, 1d);
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(f1).isBetween(0d, 1d);
}
use of org.kie.kogito.explainability.model.PredictionInputsDataDistribution in project kogito-apps by kiegroup.
the class DummyDmnModelsLimeExplainerTest method testFunctional1DMNExplanation.
@Test
void testFunctional1DMNExplanation() throws ExecutionException, InterruptedException, TimeoutException {
DMNRuntime dmnRuntime = DMNKogito.createGenericDMNRuntime(new InputStreamReader(getClass().getResourceAsStream("/dmn/functionalTest1.dmn")));
assertThat(dmnRuntime.getModels().size()).isEqualTo(1);
final String namespace = "https://kiegroup.org/dmn/_049CD980-1310-4B02-9E90-EFC57059F44A";
final String name = "functionalTest1";
DecisionModel decisionModel = new DmnDecisionModel(dmnRuntime, namespace, name);
PredictionProvider model = new DecisionModelWrapper(decisionModel);
Map<String, Object> context = new HashMap<>();
context.put("booleanInput", true);
context.put("notUsedInput", 1);
List<Feature> features = new ArrayList<>();
features.add(FeatureFactory.newCompositeFeature("context", context));
PredictionInput predictionInput = new PredictionInput(features);
List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(predictionInput)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
Prediction prediction = new SimplePrediction(predictionInput, predictionOutputs.get(0));
Random random = new Random();
PerturbationContext perturbationContext = new PerturbationContext(0L, random, 1);
LimeConfig limeConfig = new LimeConfig().withSamples(10).withPerturbationContext(perturbationContext);
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertThat(saliency).isNotNull();
List<FeatureImportance> topFeatures = saliency.getPositiveFeatures(2);
assertThat(topFeatures.isEmpty()).isFalse();
assertThat(topFeatures.get(0).getFeature().getName()).isEqualTo("booleanInput");
}
assertThatCode(() -> ValidationUtils.validateLocalSaliencyStability(model, prediction, limeExplainer, 1, 0.5, 0.5)).doesNotThrowAnyException();
String decision = "decision";
List<PredictionInput> inputs = new ArrayList<>();
for (int n = 0; n < 10; n++) {
inputs.add(new PredictionInput(DataUtils.perturbFeatures(features, perturbationContext)));
}
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
int k = 2;
int chunkSize = 5;
double precision = ExplainabilityMetrics.getLocalSaliencyPrecision(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(precision).isBetween(0d, 1d);
double recall = ExplainabilityMetrics.getLocalSaliencyRecall(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(recall).isBetween(0d, 1d);
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(f1).isBetween(0d, 1d);
}
use of org.kie.kogito.explainability.model.PredictionInputsDataDistribution in project kogito-apps by kiegroup.
the class DummyDmnModelsLimeExplainerTest method testFunctional2DMNExplanation.
@Test
void testFunctional2DMNExplanation() throws ExecutionException, InterruptedException, TimeoutException {
DMNRuntime dmnRuntime = DMNKogito.createGenericDMNRuntime(new InputStreamReader(getClass().getResourceAsStream("/dmn/functionalTest2.dmn")));
assertThat(dmnRuntime.getModels().size()).isEqualTo(1);
final String namespace = "https://kiegroup.org/dmn/_049CD980-1310-4B02-9E90-EFC57059F44A";
final String name = "new-file";
DecisionModel decisionModel = new DmnDecisionModel(dmnRuntime, namespace, name);
PredictionProvider model = new DecisionModelWrapper(decisionModel);
Map<String, Object> context = new HashMap<>();
context.put("numberInput", 1);
context.put("notUsedInput", 1);
List<Feature> features = new ArrayList<>();
features.add(FeatureFactory.newCompositeFeature("context", context));
PredictionInput predictionInput = new PredictionInput(features);
List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(predictionInput)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
Prediction prediction = new SimplePrediction(predictionInput, predictionOutputs.get(0));
Random random = new Random();
PerturbationContext perturbationContext = new PerturbationContext(0L, random, 1);
LimeConfig limeConfig = new LimeConfig().withSamples(10).withPerturbationContext(perturbationContext);
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
for (Saliency saliency : saliencyMap.values()) {
assertThat(saliency).isNotNull();
List<FeatureImportance> topFeatures = saliency.getPositiveFeatures(2);
assertThat(topFeatures.isEmpty()).isFalse();
assertThat(topFeatures.get(0).getFeature().getName()).isEqualTo("numberInput");
}
assertThatCode(() -> ValidationUtils.validateLocalSaliencyStability(model, prediction, limeExplainer, 1, 0.5, 0.5)).doesNotThrowAnyException();
String decision = "decision";
List<PredictionInput> inputs = new ArrayList<>();
for (int n = 0; n < 10; n++) {
inputs.add(new PredictionInput(DataUtils.perturbFeatures(features, perturbationContext)));
}
DataDistribution distribution = new PredictionInputsDataDistribution(inputs);
int k = 2;
int chunkSize = 5;
double precision = ExplainabilityMetrics.getLocalSaliencyPrecision(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(precision).isBetween(0d, 1d);
double recall = ExplainabilityMetrics.getLocalSaliencyRecall(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(recall).isBetween(0d, 1d);
double f1 = ExplainabilityMetrics.getLocalSaliencyF1(decision, model, limeExplainer, distribution, k, chunkSize);
assertThat(f1).isBetween(0d, 1d);
}
Aggregations