use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class DataUtilsTest method testBootstrap.
@Test
void testBootstrap() {
List<Value> values = new ArrayList<>();
PerturbationContext perturbationContext = new PerturbationContext(random, 1);
for (int i = 0; i < 4; i++) {
values.add(Type.NUMBER.randomValue(perturbationContext));
}
Feature mockedNumericFeature = TestUtils.getMockedNumericFeature();
DataDistribution dataDistribution = new IndependentFeaturesDataDistribution(List.of(new GenericFeatureDistribution(mockedNumericFeature, values)));
Map<String, FeatureDistribution> featureDistributionMap = DataUtils.boostrapFeatureDistributions(dataDistribution, perturbationContext, 10, 1, 500, new HashMap<>());
assertThat(featureDistributionMap).isNotNull();
assertThat(featureDistributionMap).isNotEmpty();
FeatureDistribution actual = featureDistributionMap.get(mockedNumericFeature.getName());
assertThat(actual).isNotNull();
List<Value> allSamples = actual.getAllSamples();
assertThat(allSamples).isNotNull();
assertThat(allSamples).hasSize(10);
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class DataUtilsTest method testRandomDistributionGeneration.
@Test
void testRandomDistributionGeneration() {
DataDistribution dataDistribution = DataUtils.generateRandomDataDistribution(10, 10, random);
assertNotNull(dataDistribution);
assertNotNull(dataDistribution.asFeatureDistributions());
for (FeatureDistribution featureDistribution : dataDistribution.asFeatureDistributions()) {
assertNotNull(featureDistribution);
}
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class DataUtils method generateRandomDataDistribution.
/**
* Generate a random data distribution.
*
* @param noOfFeatures number of features
* @param distributionSize number of samples for each feature
* @return a data distribution
*/
public static DataDistribution generateRandomDataDistribution(int noOfFeatures, int distributionSize, Random random) {
List<FeatureDistribution> featureDistributions = new LinkedList<>();
for (int i = 0; i < noOfFeatures; i++) {
double[] doubles = generateData(random.nextDouble(), random.nextDouble(), distributionSize, random);
Feature feature = FeatureFactory.newNumericalFeature("f_" + i, Double.NaN);
FeatureDistribution featureDistribution = new NumericFeatureDistribution(feature, doubles);
featureDistributions.add(featureDistribution);
}
return new IndependentFeaturesDataDistribution(featureDistributions);
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class PartialDependencePlotExplainer method explainFromDataDistribution.
private List<PartialDependenceGraph> explainFromDataDistribution(PredictionProvider model, int outputSize, DataDistribution dataDistribution) throws InterruptedException, ExecutionException, TimeoutException {
long start = System.currentTimeMillis();
List<PartialDependenceGraph> pdps = new ArrayList<>();
List<FeatureDistribution> featureDistributions = dataDistribution.asFeatureDistributions();
// fetch entire data distributions for all features
List<PredictionInput> trainingData = dataDistribution.sample(config.getSeriesLength());
// create a PDP for each feature
for (FeatureDistribution featureDistribution : featureDistributions) {
// generate (further) samples for the feature under analysis
// TBD: maybe just reuse trainingData
List<Value> xsValues = featureDistribution.sample(config.getSeriesLength()).stream().sorted(// sort alphanumerically (if Value#asNumber is NaN)
Comparator.comparing(Value::asString)).sorted(// sort by natural order
(v1, v2) -> Comparator.comparingDouble(Value::asNumber).compare(v1, v2)).distinct().collect(Collectors.toList());
List<Feature> featureXSvalues = // transform sampled Values into Features
xsValues.stream().map(v -> FeatureFactory.copyOf(featureDistribution.getFeature(), v)).collect(Collectors.toList());
// create a PDP for each feature and each output
for (int outputIndex = 0; outputIndex < outputSize; outputIndex++) {
PartialDependenceGraph partialDependenceGraph = getPartialDependenceGraph(model, trainingData, xsValues, featureXSvalues, outputIndex);
pdps.add(partialDependenceGraph);
}
}
long end = System.currentTimeMillis();
LOGGER.debug("explanation time: {}ms", (end - start));
return pdps;
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class LimeExplainer method getPerturbedInputs.
private List<PredictionInput> getPerturbedInputs(List<Feature> features, LimeConfig executionConfig, PredictionProvider predictionProvider) {
List<PredictionInput> perturbedInputs = new ArrayList<>();
int size = executionConfig.getNoOfSamples();
DataDistribution dataDistribution = executionConfig.getDataDistribution();
Map<String, FeatureDistribution> featureDistributionsMap;
PerturbationContext perturbationContext = executionConfig.getPerturbationContext();
if (!dataDistribution.isEmpty()) {
Map<String, HighScoreNumericFeatureZones> numericFeatureZonesMap;
int max = executionConfig.getBoostrapInputs();
if (executionConfig.isHighScoreFeatureZones()) {
numericFeatureZonesMap = HighScoreNumericFeatureZonesProvider.getHighScoreFeatureZones(dataDistribution, predictionProvider, features, max);
} else {
numericFeatureZonesMap = new HashMap<>();
}
// generate feature distributions, if possible
featureDistributionsMap = DataUtils.boostrapFeatureDistributions(dataDistribution, perturbationContext, 2 * size, 1, Math.min(size, max), numericFeatureZonesMap);
} else {
featureDistributionsMap = new HashMap<>();
}
for (int i = 0; i < size; i++) {
List<Feature> newFeatures = DataUtils.perturbFeatures(features, perturbationContext, featureDistributionsMap);
perturbedInputs.add(new PredictionInput(newFeatures));
}
return perturbedInputs;
}
Aggregations