use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class CounterfactualEntityFactoryTest method testDurationFactory.
@Test
void testDurationFactory() {
final Duration value = Duration.ofDays(1);
Feature feature = FeatureFactory.newDurationFeature("duration-feature", value);
CounterfactualEntity counterfactualEntity = CounterfactualEntityFactory.from(feature);
assertTrue(counterfactualEntity instanceof FixedDurationEntity);
assertEquals(Type.DURATION, counterfactualEntity.asFeature().getType());
FeatureDomain domain = DurationFeatureDomain.create(0, 60, ChronoUnit.SECONDS);
feature = FeatureFactory.newDurationFeature("duration-feature", value, domain);
counterfactualEntity = CounterfactualEntityFactory.from(feature);
assertTrue(counterfactualEntity instanceof DurationEntity);
assertEquals(Type.DURATION, counterfactualEntity.asFeature().getType());
assertFalse(counterfactualEntity.isConstrained());
CounterfactualEntity entity = DurationEntity.from(feature, Duration.ZERO, Duration.ofDays(2));
assertEquals(0, entity.distance());
assertTrue(((DurationEntity) entity).getValueRange().contains(1e5));
assertFalse(((DurationEntity) entity).getValueRange().contains(2e5));
assertFalse(entity.isConstrained());
entity = DurationEntity.from(feature, Duration.ZERO, Duration.ofDays(2), false);
assertEquals(0, entity.distance());
assertFalse(entity.isConstrained());
FeatureDistribution distribution = new NumericFeatureDistribution(feature, new Random().doubles(10).toArray());
entity = DurationEntity.from(feature, Duration.ZERO, Duration.ofDays(2), distribution);
assertEquals(0, entity.distance());
assertFalse(entity.isConstrained());
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class CounterfactualExplainerTest method testCounterfactualConstrainedMatchScaled.
@ParameterizedTest
@ValueSource(ints = { 0, 1, 2 })
void testCounterfactualConstrainedMatchScaled(int seed) throws ExecutionException, InterruptedException, TimeoutException {
Random random = new Random();
random.setSeed(seed);
final List<Output> goal = List.of(new Output("inside", Type.BOOLEAN, new Value(true), 0.0d));
List<Feature> features = new LinkedList<>();
List<FeatureDistribution> featureDistributions = new LinkedList<>();
final Feature fnum1 = FeatureFactory.newNumericalFeature("f-num1", 100.0);
features.add(fnum1);
featureDistributions.add(new NumericFeatureDistribution(fnum1, (new NormalDistribution(500, 1.1)).sample(1000)));
final Feature fnum2 = FeatureFactory.newNumericalFeature("f-num2", 100.0, NumericalFeatureDomain.create(0.0, 1000.0));
features.add(fnum2);
featureDistributions.add(new NumericFeatureDistribution(fnum2, (new NormalDistribution(430.0, 1.7)).sample(1000)));
final Feature fnum3 = FeatureFactory.newNumericalFeature("f-num3", 100.0, NumericalFeatureDomain.create(0.0, 1000.0));
features.add(fnum3);
featureDistributions.add(new NumericFeatureDistribution(fnum3, (new NormalDistribution(470.0, 2.9)).sample(1000)));
final Feature fnum4 = FeatureFactory.newNumericalFeature("f-num4", 100.0);
features.add(fnum4);
featureDistributions.add(new NumericFeatureDistribution(fnum4, (new NormalDistribution(2390.0, 0.3)).sample(1000)));
final double center = 500.0;
final double epsilon = 10.0;
final CounterfactualResult result = runCounterfactualSearch((long) seed, goal, features, TestUtils.getSumThresholdModel(center, epsilon), DEFAULT_GOAL_THRESHOLD);
final List<CounterfactualEntity> counterfactualEntities = result.getEntities();
double totalSum = 0;
for (CounterfactualEntity entity : counterfactualEntities) {
totalSum += entity.asFeature().getValue().asNumber();
logger.debug("Entity: {}", entity);
}
assertFalse(counterfactualEntities.get(0).isChanged());
assertFalse(counterfactualEntities.get(3).isChanged());
assertTrue(totalSum <= center + epsilon);
assertTrue(totalSum >= center - epsilon);
assertTrue(result.isValid());
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class DataUtils method boostrapFeatureDistributions.
/**
* Generate feature distributions from an existing (evantually small) {@link DataDistribution} for each {@link Feature}.
* Each feature intervals (min, max) and density information (mean, stdDev) are generated using bootstrap, then
* data points are sampled from a normal distribution (see {@link #generateData(double, double, int, Random)}).
*
* @param dataDistribution data distribution to take feature values from
* @param perturbationContext perturbation context
* @param featureDistributionSize desired size of generated feature distributions
* @param draws number of times sampling from feature values is performed
* @param sampleSize size of each sample draw
* @param numericFeatureZonesMap high feature score zones
* @return a map feature name -> generated feature distribution
*/
public static Map<String, FeatureDistribution> boostrapFeatureDistributions(DataDistribution dataDistribution, PerturbationContext perturbationContext, int featureDistributionSize, int draws, int sampleSize, Map<String, HighScoreNumericFeatureZones> numericFeatureZonesMap) {
Map<String, FeatureDistribution> featureDistributions = new HashMap<>();
for (FeatureDistribution featureDistribution : dataDistribution.asFeatureDistributions()) {
Feature feature = featureDistribution.getFeature();
if (Type.NUMBER.equals(feature.getType())) {
List<Value> values = featureDistribution.getAllSamples();
double[] means = new double[draws];
double[] stdDevs = new double[draws];
double[] mins = new double[draws];
double[] maxs = new double[draws];
for (int i = 0; i < draws; i++) {
List<Value> sampledValues = DataUtils.sampleWithReplacement(values, sampleSize, perturbationContext.getRandom());
double[] data = sampledValues.stream().mapToDouble(Value::asNumber).toArray();
double mean = DataUtils.getMean(data);
double stdDev = Math.pow(DataUtils.getStdDev(data, mean), 2);
double min = Arrays.stream(data).min().orElse(Double.MIN_VALUE);
double max = Arrays.stream(data).max().orElse(Double.MAX_VALUE);
means[i] = mean;
stdDevs[i] = stdDev;
mins[i] = min;
maxs[i] = max;
}
double finalMean = DataUtils.getMean(means);
double finalStdDev = Math.sqrt(DataUtils.getMean(stdDevs));
double finalMin = DataUtils.getMean(mins);
double finalMax = DataUtils.getMean(maxs);
double[] doubles = DataUtils.generateData(finalMean, finalStdDev, featureDistributionSize, perturbationContext.getRandom());
double[] boundedData = Arrays.stream(doubles).map(d -> Math.min(Math.max(d, finalMin), finalMax)).toArray();
HighScoreNumericFeatureZones highScoreNumericFeatureZones = numericFeatureZonesMap.get(feature.getName());
double[] finaldata;
if (highScoreNumericFeatureZones != null) {
double[] filteredData = DoubleStream.of(boundedData).filter(highScoreNumericFeatureZones::test).toArray();
// only use the filtered data if it's not discarding more than 50% of the points
if (filteredData.length > featureDistributionSize / 2) {
finaldata = filteredData;
} else {
finaldata = boundedData;
}
} else {
finaldata = boundedData;
}
NumericFeatureDistribution numericFeatureDistribution = new NumericFeatureDistribution(feature, finaldata);
featureDistributions.put(feature.getName(), numericFeatureDistribution);
}
}
return featureDistributions;
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class HighScoreNumericFeatureZonesProviderTest method testEmptyData.
@Test
void testEmptyData() {
List<Feature> features = new ArrayList<>();
PredictionProvider predictionProvider = TestUtils.getSumThresholdModel(0.1, 0.1);
List<FeatureDistribution> featureDistributions = new ArrayList<>();
DataDistribution dataDistribution = new IndependentFeaturesDataDistribution(featureDistributions);
Map<String, HighScoreNumericFeatureZones> highScoreFeatureZones = HighScoreNumericFeatureZonesProvider.getHighScoreFeatureZones(dataDistribution, predictionProvider, features, 10);
assertThat(highScoreFeatureZones).isNotNull();
assertThat(highScoreFeatureZones.size()).isZero();
}
use of org.kie.kogito.explainability.model.FeatureDistribution in project kogito-apps by kiegroup.
the class LimeExplainerTest method testWithDataDistribution.
@Test
void testWithDataDistribution() throws InterruptedException, ExecutionException, TimeoutException {
Random random = new Random();
PerturbationContext perturbationContext = new PerturbationContext(4L, random, 1);
List<FeatureDistribution> featureDistributions = new ArrayList<>();
int nf = 4;
List<Feature> features = new ArrayList<>();
for (int i = 0; i < nf; i++) {
Feature numericalFeature = FeatureFactory.newNumericalFeature("f-" + i, Double.NaN);
features.add(numericalFeature);
List<Value> values = new ArrayList<>();
for (int r = 0; r < 4; r++) {
values.add(Type.NUMBER.randomValue(perturbationContext));
}
featureDistributions.add(new GenericFeatureDistribution(numericalFeature, values));
}
DataDistribution dataDistribution = new IndependentFeaturesDataDistribution(featureDistributions);
LimeConfig limeConfig = new LimeConfig().withDataDistribution(dataDistribution).withPerturbationContext(perturbationContext).withSamples(10);
LimeExplainer limeExplainer = new LimeExplainer(limeConfig);
PredictionInput input = new PredictionInput(features);
PredictionProvider model = TestUtils.getSumThresholdModel(random.nextDouble(), random.nextDouble());
PredictionOutput output = model.predictAsync(List.of(input)).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit()).get(0);
Prediction prediction = new SimplePrediction(input, output);
Map<String, Saliency> saliencyMap = limeExplainer.explainAsync(prediction, model).get(Config.INSTANCE.getAsyncTimeout(), Config.INSTANCE.getAsyncTimeUnit());
assertThat(saliencyMap).isNotNull();
String decisionName = "inside";
Saliency saliency = saliencyMap.get(decisionName);
assertThat(saliency).isNotNull();
}
Aggregations