Search in sources :

Example 11 with PredictionInput

use of org.kie.kogito.explainability.model.PredictionInput in project kogito-apps by kiegroup.

the class CounterfactualScoreCalculatorTest method testGoalSizeSmaller.

/**
 * Using a smaller number of features in the goals (1) than the model's output (2) should
 * throw an {@link IllegalArgumentException} with the appropriate message.
 */
@Test
void testGoalSizeSmaller() throws ExecutionException, InterruptedException {
    final CounterFactualScoreCalculator scoreCalculator = new CounterFactualScoreCalculator();
    PredictionProvider model = TestUtils.getFeatureSkipModel(0);
    List<Feature> features = new ArrayList<>();
    List<FeatureDomain> featureDomains = new ArrayList<>();
    List<Boolean> constraints = new ArrayList<>();
    // f-1
    features.add(FeatureFactory.newNumericalFeature("f-1", 1.0));
    featureDomains.add(NumericalFeatureDomain.create(0.0, 10.0));
    constraints.add(false);
    // f-2
    features.add(FeatureFactory.newNumericalFeature("f-2", 2.0));
    featureDomains.add(NumericalFeatureDomain.create(0.0, 10.0));
    constraints.add(false);
    // f-3
    features.add(FeatureFactory.newBooleanFeature("f-3", true));
    featureDomains.add(EmptyFeatureDomain.create());
    constraints.add(false);
    PredictionInput input = new PredictionInput(features);
    PredictionFeatureDomain domains = new PredictionFeatureDomain(featureDomains);
    List<CounterfactualEntity> entities = CounterfactualEntityFactory.createEntities(input);
    List<Output> goal = new ArrayList<>();
    goal.add(new Output("f-2", Type.NUMBER, new Value(2.0), 0.0));
    List<PredictionOutput> predictionOutputs = model.predictAsync(List.of(input)).get();
    assertEquals(1, goal.size());
    // A single prediction is expected
    assertEquals(1, predictionOutputs.size());
    // Single prediction with two features
    assertEquals(2, predictionOutputs.get(0).getOutputs().size());
    final CounterfactualSolution solution = new CounterfactualSolution(entities, features, model, goal, UUID.randomUUID(), UUID.randomUUID(), 0.0);
    IllegalArgumentException exception = assertThrows(IllegalArgumentException.class, () -> {
        scoreCalculator.calculateScore(solution);
    });
    assertEquals("Prediction size must be equal to goal size", exception.getMessage());
}
Also used : PredictionInput(org.kie.kogito.explainability.model.PredictionInput) ArrayList(java.util.ArrayList) EmptyFeatureDomain(org.kie.kogito.explainability.model.domain.EmptyFeatureDomain) PredictionFeatureDomain(org.kie.kogito.explainability.model.PredictionFeatureDomain) NumericalFeatureDomain(org.kie.kogito.explainability.model.domain.NumericalFeatureDomain) FeatureDomain(org.kie.kogito.explainability.model.domain.FeatureDomain) PredictionProvider(org.kie.kogito.explainability.model.PredictionProvider) Feature(org.kie.kogito.explainability.model.Feature) CounterfactualEntity(org.kie.kogito.explainability.local.counterfactual.entities.CounterfactualEntity) PredictionFeatureDomain(org.kie.kogito.explainability.model.PredictionFeatureDomain) PredictionOutput(org.kie.kogito.explainability.model.PredictionOutput) PredictionOutput(org.kie.kogito.explainability.model.PredictionOutput) Output(org.kie.kogito.explainability.model.Output) Value(org.kie.kogito.explainability.model.Value) Test(org.junit.jupiter.api.Test) ParameterizedTest(org.junit.jupiter.params.ParameterizedTest)

Example 12 with PredictionInput

use of org.kie.kogito.explainability.model.PredictionInput in project kogito-apps by kiegroup.

the class DataUtils method linearizeInputs.

/**
 * Transform a list of prediction inputs into another list of the same prediction inputs but having linearized features.
 *
 * @param predictionInputs a list of prediction inputs
 * @return a list of prediction inputs with linearized features
 */
public static List<PredictionInput> linearizeInputs(List<PredictionInput> predictionInputs) {
    List<PredictionInput> newInputs = new LinkedList<>();
    for (PredictionInput predictionInput : predictionInputs) {
        List<Feature> originalFeatures = predictionInput.getFeatures();
        List<Feature> flattenedFeatures = getLinearizedFeatures(originalFeatures);
        newInputs.add(new PredictionInput(flattenedFeatures));
    }
    return newInputs;
}
Also used : PredictionInput(org.kie.kogito.explainability.model.PredictionInput) Feature(org.kie.kogito.explainability.model.Feature) LinkedList(java.util.LinkedList)

Example 13 with PredictionInput

use of org.kie.kogito.explainability.model.PredictionInput in project kogito-apps by kiegroup.

the class DataUtils method readCSV.

/**
 * Read a CSV file into a {@link DataDistribution} object.
 *
 * @param file the path to the CSV file
 * @param schema an ordered list of {@link Type}s as the 'schema', used to determine
 *        the {@link Type} of each feature / column
 * @return the parsed CSV as a {@link DataDistribution}
 * @throws IOException when failing at reading the CSV file
 * @throws MalformedInputException if any record in CSV has different size with respect to the specified schema
 */
public static DataDistribution readCSV(Path file, List<Type> schema) throws IOException {
    List<PredictionInput> inputs = new ArrayList<>();
    try (BufferedReader reader = Files.newBufferedReader(file)) {
        Iterable<CSVRecord> records = CSVFormat.RFC4180.withFirstRecordAsHeader().parse(reader);
        for (CSVRecord record : records) {
            int size = record.size();
            if (schema.size() == size) {
                List<Feature> features = new ArrayList<>();
                for (int i = 0; i < size; i++) {
                    String s = record.get(i);
                    Type type = schema.get(i);
                    features.add(new Feature(record.getParser().getHeaderNames().get(i), type, new Value(s)));
                }
                inputs.add(new PredictionInput(features));
            } else {
                throw new MalformedInputException(size);
            }
        }
    }
    return new PredictionInputsDataDistribution(inputs);
}
Also used : PredictionInput(org.kie.kogito.explainability.model.PredictionInput) ArrayList(java.util.ArrayList) Feature(org.kie.kogito.explainability.model.Feature) Type(org.kie.kogito.explainability.model.Type) BufferedReader(java.io.BufferedReader) Value(org.kie.kogito.explainability.model.Value) MalformedInputException(java.nio.charset.MalformedInputException) CSVRecord(org.apache.commons.csv.CSVRecord) PredictionInputsDataDistribution(org.kie.kogito.explainability.model.PredictionInputsDataDistribution)

Example 14 with PredictionInput

use of org.kie.kogito.explainability.model.PredictionInput in project kogito-apps by kiegroup.

the class ExplainabilityMetrics method getLocalSaliencyRecall.

/**
 * Evaluate the recall of a local saliency explainer on a given model.
 * Get the predictions having outputs with the highest score for the given decision and pair them with predictions
 * whose outputs have the lowest score for the same decision.
 * Get the top k (most important) features (according to the saliency) for the most important outputs and
 * "paste" them on each paired input corresponding to an output with low score (for the target decision).
 * Perform prediction on the "masked" input, if the output on the masked input is equals to the output for the
 * input the mask features were take from, that's considered a true positive, otherwise it's a false positive.
 * see Section 3.2.1 of https://openreview.net/attachment?id=B1xBAA4FwH&name=original_pdf
 *
 * @param outputName decision to evaluate recall for
 * @param predictionProvider the prediction provider to test
 * @param localExplainer the explainer to evaluate
 * @param dataDistribution the data distribution used to obtain inputs for evaluation
 * @param k the no. of features to extract
 * @param chunkSize the size of the chunk of predictions to use for evaluation
 * @return the saliency recall
 */
public static double getLocalSaliencyRecall(String outputName, PredictionProvider predictionProvider, LocalExplainer<Map<String, Saliency>> localExplainer, DataDistribution dataDistribution, int k, int chunkSize) throws InterruptedException, ExecutionException, TimeoutException {
    // get all samples from the data distribution
    List<Prediction> sorted = DataUtils.getScoreSortedPredictions(outputName, predictionProvider, dataDistribution);
    // get the top and bottom 'chunkSize' predictions
    List<Prediction> topChunk = new ArrayList<>(sorted.subList(0, chunkSize));
    List<Prediction> bottomChunk = new ArrayList<>(sorted.subList(sorted.size() - chunkSize, sorted.size()));
    double truePositives = 0;
    double falseNegatives = 0;
    int currentChunk = 0;
    // input, then feed the model with this masked input and check the output is equals to the top scored one.
    for (Prediction prediction : topChunk) {
        Optional<Output> optionalOutput = prediction.getOutput().getByName(outputName);
        if (optionalOutput.isPresent()) {
            Output output = optionalOutput.get();
            Map<String, Saliency> stringSaliencyMap = localExplainer.explainAsync(prediction, predictionProvider).get(Config.DEFAULT_ASYNC_TIMEOUT, Config.DEFAULT_ASYNC_TIMEUNIT);
            if (stringSaliencyMap.containsKey(outputName)) {
                Saliency saliency = stringSaliencyMap.get(outputName);
                List<FeatureImportance> topFeatures = saliency.getPerFeatureImportance().stream().sorted((f1, f2) -> Double.compare(f2.getScore(), f1.getScore())).limit(k).collect(Collectors.toList());
                PredictionInput input = bottomChunk.get(currentChunk).getInput();
                PredictionInput maskedInput = maskInput(topFeatures, input);
                List<PredictionOutput> predictionOutputList = predictionProvider.predictAsync(List.of(maskedInput)).get(Config.DEFAULT_ASYNC_TIMEOUT, Config.DEFAULT_ASYNC_TIMEUNIT);
                if (!predictionOutputList.isEmpty()) {
                    PredictionOutput predictionOutput = predictionOutputList.get(0);
                    Optional<Output> optionalNewOutput = predictionOutput.getByName(outputName);
                    if (optionalNewOutput.isPresent()) {
                        Output newOutput = optionalOutput.get();
                        if (output.getValue().equals(newOutput.getValue())) {
                            truePositives++;
                        } else {
                            falseNegatives++;
                        }
                    }
                }
                currentChunk++;
            }
        }
    }
    if ((truePositives + falseNegatives) > 0) {
        return truePositives / (truePositives + falseNegatives);
    } else {
        // if topChunk is empty or the target output (by name) is not an output of the model.
        return Double.NaN;
    }
}
Also used : PredictionInput(org.kie.kogito.explainability.model.PredictionInput) Prediction(org.kie.kogito.explainability.model.Prediction) ArrayList(java.util.ArrayList) Saliency(org.kie.kogito.explainability.model.Saliency) FeatureImportance(org.kie.kogito.explainability.model.FeatureImportance) PredictionOutput(org.kie.kogito.explainability.model.PredictionOutput) PredictionOutput(org.kie.kogito.explainability.model.PredictionOutput) Output(org.kie.kogito.explainability.model.Output)

Example 15 with PredictionInput

use of org.kie.kogito.explainability.model.PredictionInput in project kogito-apps by kiegroup.

the class ExplainabilityMetrics method getLocalSaliencyPrecision.

/**
 * Evaluate the precision of a local saliency explainer on a given model.
 * Get the predictions having outputs with the lowest score for the given decision and pair them with predictions
 * whose outputs have the highest score for the same decision.
 * Get the bottom k (less important) features (according to the saliency) for the less important outputs and
 * "paste" them on each paired input corresponding to an output with high score (for the target decision).
 * Perform prediction on the "masked" input, if the output changes that's considered a false negative, otherwise
 * it's a true positive.
 * see Section 3.2.1 of https://openreview.net/attachment?id=B1xBAA4FwH&name=original_pdf
 *
 * @param outputName decision to evaluate recall for
 * @param predictionProvider the prediction provider to test
 * @param localExplainer the explainer to evaluate
 * @param dataDistribution the data distribution used to obtain inputs for evaluation
 * @param k the no. of features to extract
 * @param chunkSize the size of the chunk of predictions to use for evaluation
 * @return the saliency precision
 */
public static double getLocalSaliencyPrecision(String outputName, PredictionProvider predictionProvider, LocalExplainer<Map<String, Saliency>> localExplainer, DataDistribution dataDistribution, int k, int chunkSize) throws InterruptedException, ExecutionException, TimeoutException {
    List<Prediction> sorted = DataUtils.getScoreSortedPredictions(outputName, predictionProvider, dataDistribution);
    // get the top and bottom 'chunkSize' predictions
    List<Prediction> topChunk = new ArrayList<>(sorted.subList(0, chunkSize));
    List<Prediction> bottomChunk = new ArrayList<>(sorted.subList(sorted.size() - chunkSize, sorted.size()));
    double truePositives = 0;
    double falsePositives = 0;
    int currentChunk = 0;
    for (Prediction prediction : bottomChunk) {
        Map<String, Saliency> stringSaliencyMap = localExplainer.explainAsync(prediction, predictionProvider).get(Config.DEFAULT_ASYNC_TIMEOUT, Config.DEFAULT_ASYNC_TIMEUNIT);
        if (stringSaliencyMap.containsKey(outputName)) {
            Saliency saliency = stringSaliencyMap.get(outputName);
            List<FeatureImportance> topFeatures = saliency.getPerFeatureImportance().stream().sorted(Comparator.comparingDouble(FeatureImportance::getScore)).limit(k).collect(Collectors.toList());
            Prediction topPrediction = topChunk.get(currentChunk);
            PredictionInput input = topPrediction.getInput();
            PredictionInput maskedInput = maskInput(topFeatures, input);
            List<PredictionOutput> predictionOutputList = predictionProvider.predictAsync(List.of(maskedInput)).get(Config.DEFAULT_ASYNC_TIMEOUT, Config.DEFAULT_ASYNC_TIMEUNIT);
            if (!predictionOutputList.isEmpty()) {
                PredictionOutput predictionOutput = predictionOutputList.get(0);
                Optional<Output> newOptionalOutput = predictionOutput.getByName(outputName);
                if (newOptionalOutput.isPresent()) {
                    Output newOutput = newOptionalOutput.get();
                    Optional<Output> optionalOutput = topPrediction.getOutput().getByName(outputName);
                    if (optionalOutput.isPresent()) {
                        Output output = optionalOutput.get();
                        if (output.getValue().equals(newOutput.getValue())) {
                            truePositives++;
                        } else {
                            falsePositives++;
                        }
                    }
                }
            }
            currentChunk++;
        }
    }
    if ((truePositives + falsePositives) > 0) {
        return truePositives / (truePositives + falsePositives);
    } else {
        // if bottomChunk is empty or the target output (by name) is not an output of the model.
        return Double.NaN;
    }
}
Also used : PredictionInput(org.kie.kogito.explainability.model.PredictionInput) Prediction(org.kie.kogito.explainability.model.Prediction) ArrayList(java.util.ArrayList) Saliency(org.kie.kogito.explainability.model.Saliency) FeatureImportance(org.kie.kogito.explainability.model.FeatureImportance) PredictionOutput(org.kie.kogito.explainability.model.PredictionOutput) PredictionOutput(org.kie.kogito.explainability.model.PredictionOutput) Output(org.kie.kogito.explainability.model.Output)

Aggregations

PredictionInput (org.kie.kogito.explainability.model.PredictionInput)187 PredictionOutput (org.kie.kogito.explainability.model.PredictionOutput)143 PredictionProvider (org.kie.kogito.explainability.model.PredictionProvider)135 Prediction (org.kie.kogito.explainability.model.Prediction)126 Feature (org.kie.kogito.explainability.model.Feature)109 Test (org.junit.jupiter.api.Test)107 ArrayList (java.util.ArrayList)97 SimplePrediction (org.kie.kogito.explainability.model.SimplePrediction)95 Random (java.util.Random)86 PerturbationContext (org.kie.kogito.explainability.model.PerturbationContext)67 ParameterizedTest (org.junit.jupiter.params.ParameterizedTest)60 Output (org.kie.kogito.explainability.model.Output)55 LimeConfig (org.kie.kogito.explainability.local.lime.LimeConfig)54 LinkedList (java.util.LinkedList)53 LimeExplainer (org.kie.kogito.explainability.local.lime.LimeExplainer)52 Value (org.kie.kogito.explainability.model.Value)52 Saliency (org.kie.kogito.explainability.model.Saliency)50 List (java.util.List)39 LimeConfigOptimizer (org.kie.kogito.explainability.local.lime.optim.LimeConfigOptimizer)33 Type (org.kie.kogito.explainability.model.Type)31