Search in sources :

Example 1 with DescriptiveStats

use of org.tribuo.evaluation.DescriptiveStats in project tribuo by oracle.

the class EvaluationAggregationTests method summarizeF1AcrossDatasets.

@Test
public void summarizeF1AcrossDatasets() {
    Pair<Dataset<Label>, Dataset<Label>> pair = LabelledDataGenerator.denseTrainTest(-0.3);
    Model<Label> model = DummyClassifierTrainer.createMostFrequentTrainer().train(pair.getA());
    List<Dataset<Label>> datasets = Arrays.asList(LabelledDataGenerator.denseTrainTest(-1.0).getB(), LabelledDataGenerator.denseTrainTest(-0.5).getB(), LabelledDataGenerator.denseTrainTest(-0.1).getB());
    LabelMetric macroF1Metric = LabelMetrics.F1.forTarget(MetricTarget.macroAverageTarget());
    DescriptiveStats summary = EvaluationAggregator.summarize(macroF1Metric, model, datasets);
    List<Double> macroF1 = new ArrayList<>();
    for (Dataset<Label> d : datasets) {
        LabelEvaluation eval = factory.getEvaluator().evaluate(model, datasets.get(0));
        macroF1.add(eval.macroAveragedF1());
    }
    DescriptiveStats otherSummary = new DescriptiveStats(macroF1);
    assertEquals(summary, otherSummary);
}
Also used : Dataset(org.tribuo.Dataset) DescriptiveStats(org.tribuo.evaluation.DescriptiveStats) Label(org.tribuo.classification.Label) ArrayList(java.util.ArrayList) Test(org.junit.jupiter.api.Test)

Example 2 with DescriptiveStats

use of org.tribuo.evaluation.DescriptiveStats in project tribuo by oracle.

the class EvaluationAggregationTests method summarizeF1AcrossDatasets_v2.

@Test
public void summarizeF1AcrossDatasets_v2() {
    Pair<Dataset<Label>, Dataset<Label>> pair = LabelledDataGenerator.denseTrainTest(-0.3);
    Model<Label> model = DummyClassifierTrainer.createMostFrequentTrainer().train(pair.getA());
    List<Dataset<Label>> datasets = Arrays.asList(LabelledDataGenerator.denseTrainTest(-1.0).getB(), LabelledDataGenerator.denseTrainTest(-0.5).getB(), LabelledDataGenerator.denseTrainTest(-0.1).getB());
    Evaluator<Label, LabelEvaluation> evaluator = factory.getEvaluator();
    Map<MetricID<Label>, DescriptiveStats> summaries = EvaluationAggregator.summarize(evaluator, model, datasets);
    MetricID<Label> macroF1 = LabelMetrics.F1.forTarget(MetricTarget.macroAverageTarget()).getID();
    DescriptiveStats summary = summaries.get(macroF1);
    // Can also do this:
    List<LabelEvaluation> evals = datasets.stream().map(dataset -> evaluator.evaluate(model, dataset)).collect(Collectors.toList());
    Map<MetricID<Label>, DescriptiveStats> summaries2 = EvaluationAggregator.summarize(evals);
    assertEquals(summaries, summaries2);
}
Also used : MetricTarget(org.tribuo.evaluation.metrics.MetricTarget) Arrays(java.util.Arrays) Evaluator(org.tribuo.evaluation.Evaluator) Prediction(org.tribuo.Prediction) Model(org.tribuo.Model) EvaluationAggregator(org.tribuo.evaluation.EvaluationAggregator) Pair(com.oracle.labs.mlrg.olcut.util.Pair) Collectors(java.util.stream.Collectors) MetricID(org.tribuo.evaluation.metrics.MetricID) System.out(java.lang.System.out) ArrayList(java.util.ArrayList) Dataset(org.tribuo.Dataset) Test(org.junit.jupiter.api.Test) Trainer(org.tribuo.Trainer) DummyClassifierTrainer(org.tribuo.classification.baseline.DummyClassifierTrainer) List(java.util.List) LabelFactory(org.tribuo.classification.LabelFactory) Map(java.util.Map) DescriptiveStats(org.tribuo.evaluation.DescriptiveStats) LabelledDataGenerator(org.tribuo.classification.example.LabelledDataGenerator) Assertions.assertEquals(org.junit.jupiter.api.Assertions.assertEquals) Comparator(java.util.Comparator) Label(org.tribuo.classification.Label) CrossValidation(org.tribuo.evaluation.CrossValidation) Dataset(org.tribuo.Dataset) Label(org.tribuo.classification.Label) MetricID(org.tribuo.evaluation.metrics.MetricID) DescriptiveStats(org.tribuo.evaluation.DescriptiveStats) Test(org.junit.jupiter.api.Test)

Example 3 with DescriptiveStats

use of org.tribuo.evaluation.DescriptiveStats in project tribuo by oracle.

the class ConfigurableTrainTest method main.

/**
 * @param args the command line arguments
 * @param <T> The {@link Output} subclass.
 */
@SuppressWarnings("unchecked")
public static <T extends Output<T>> void main(String[] args) {
    // 
    // Use the labs format logging.
    LabsLogFormatter.setAllLogFormatters();
    ConfigurableTrainTestOptions o = new ConfigurableTrainTestOptions();
    ConfigurationManager cm;
    try {
        cm = new ConfigurationManager(args, o);
    } catch (UsageException e) {
        logger.info(e.getMessage());
        return;
    }
    if (o.general.trainingPath == null || o.general.testingPath == null || o.outputFactory == null) {
        logger.info(cm.usage());
        System.exit(1);
    }
    Pair<Dataset<T>, Dataset<T>> data = null;
    try {
        data = o.general.load((OutputFactory<T>) o.outputFactory);
    } catch (IOException e) {
        logger.log(Level.SEVERE, "Failed to load data", e);
        System.exit(1);
    }
    Dataset<T> train = data.getA();
    Dataset<T> test = data.getB();
    if (o.trainer == null) {
        logger.warning("No trainer supplied");
        logger.info(cm.usage());
        System.exit(1);
    }
    if (o.transformationMap != null) {
        o.trainer = new TransformTrainer<>(o.trainer, o.transformationMap);
    }
    logger.info("Trainer is " + o.trainer.getProvenance().toString());
    logger.info("Outputs are " + train.getOutputInfo().toReadableString());
    logger.info("Number of features: " + train.getFeatureMap().size());
    final long trainStart = System.currentTimeMillis();
    Model<T> model = ((Trainer<T>) o.trainer).train(train);
    final long trainStop = System.currentTimeMillis();
    logger.info("Finished training classifier " + Util.formatDuration(trainStart, trainStop));
    Evaluator<T, ? extends Evaluation<T>> evaluator = train.getOutputFactory().getEvaluator();
    final long testStart = System.currentTimeMillis();
    Evaluation<T> evaluation = evaluator.evaluate(model, test);
    final long testStop = System.currentTimeMillis();
    logger.info("Finished evaluating model " + Util.formatDuration(testStart, testStop));
    System.out.println(evaluation.toString());
    if (o.general.outputPath != null) {
        try {
            o.general.saveModel(model);
        } catch (IOException e) {
            logger.log(Level.SEVERE, "Error writing model", e);
        }
    }
    if (o.crossValidation) {
        if (o.numFolds > 1) {
            logger.info("Running " + o.numFolds + " fold cross-validation");
            CrossValidation<T, ? extends Evaluation<T>> cv = new CrossValidation<>((Trainer<T>) o.trainer, train, evaluator, o.numFolds, o.general.seed);
            List<? extends Pair<? extends Evaluation<T>, Model<T>>> evaluations = cv.evaluate();
            List<Evaluation<T>> evals = evaluations.stream().map(Pair::getA).collect(Collectors.toList());
            // Summarize across everything
            Map<MetricID<T>, DescriptiveStats> summary = EvaluationAggregator.summarize(evals);
            List<MetricID<T>> keys = new ArrayList<>(summary.keySet()).stream().sorted(Comparator.comparing(Pair::getB)).collect(Collectors.toList());
            System.out.println("Summary across the folds:");
            for (MetricID<T> key : keys) {
                DescriptiveStats stats = summary.get(key);
                System.out.printf("%-10s  %.5f (%.5f)%n", key, stats.getMean(), stats.getStandardDeviation());
            }
        } else {
            logger.warning("The number of cross-validation folds must be greater than 1, found " + o.numFolds);
        }
    }
}
Also used : UsageException(com.oracle.labs.mlrg.olcut.config.UsageException) TransformTrainer(org.tribuo.transform.TransformTrainer) Trainer(org.tribuo.Trainer) MetricID(org.tribuo.evaluation.metrics.MetricID) DescriptiveStats(org.tribuo.evaluation.DescriptiveStats) ConfigurationManager(com.oracle.labs.mlrg.olcut.config.ConfigurationManager) Pair(com.oracle.labs.mlrg.olcut.util.Pair) Evaluation(org.tribuo.evaluation.Evaluation) Dataset(org.tribuo.Dataset) IOException(java.io.IOException) Model(org.tribuo.Model) CrossValidation(org.tribuo.evaluation.CrossValidation) OutputFactory(org.tribuo.OutputFactory)

Example 4 with DescriptiveStats

use of org.tribuo.evaluation.DescriptiveStats in project tribuo by oracle.

the class EvaluationAggregationTests method macroPrec.

/**
 * Use EvaluationAggregator to summarize model outputs across several metrics (you can compute e.g., macro precision this way)
 */
@Test
public void macroPrec() {
    Pair<Dataset<Label>, Dataset<Label>> pair = LabelledDataGenerator.denseTrainTest();
    Model<Label> model = DummyClassifierTrainer.createUniformTrainer(1L).train(pair.getA());
    List<LabelMetric> metrics = Arrays.asList(LabelMetrics.PRECISION.forTarget(new MetricTarget<>(factory.generateOutput("Foo"))), LabelMetrics.PRECISION.forTarget(new MetricTarget<>(factory.generateOutput("Bar"))), LabelMetrics.PRECISION.forTarget(new MetricTarget<>(factory.generateOutput("Baz"))), LabelMetrics.PRECISION.forTarget(new MetricTarget<>(factory.generateOutput("Quux"))));
    List<Prediction<Label>> predictions = model.predict(pair.getB());
    DescriptiveStats summary = EvaluationAggregator.summarize(metrics, model, predictions);
    double macroPrecision = summary.getMean();
    // Alternatively...
    LabelEvaluation evaluation = factory.getEvaluator().evaluate(model, predictions, pair.getB().getProvenance());
    // they should be the same
    assertEquals(evaluation.macroAveragedPrecision(), macroPrecision);
}
Also used : MetricTarget(org.tribuo.evaluation.metrics.MetricTarget) Dataset(org.tribuo.Dataset) DescriptiveStats(org.tribuo.evaluation.DescriptiveStats) Prediction(org.tribuo.Prediction) Label(org.tribuo.classification.Label) Test(org.junit.jupiter.api.Test)

Example 5 with DescriptiveStats

use of org.tribuo.evaluation.DescriptiveStats in project tribuo by oracle.

the class EvaluationAggregationTests method summarizeF1AcrossModels.

public static void summarizeF1AcrossModels() {
    Pair<Dataset<Label>, Dataset<Label>> pair = LabelledDataGenerator.denseTrainTest();
    List<Model<Label>> models = Arrays.asList(DummyClassifierTrainer.createUniformTrainer(1L).train(pair.getA()), DummyClassifierTrainer.createUniformTrainer(2L).train(pair.getA()), DummyClassifierTrainer.createUniformTrainer(3L).train(pair.getA()));
    // // summary for label 'Foo'
    // MetricTarget<Label> target = new MetricTarget<Label>(factory.generateOutput("Foo"));
    // LabelMetric metric = LabelMetrics.Default.F1.forTarget(target);
    // DescriptiveStats summary = EvaluationAggregator.summarize(metric, models, pair.getB());
    // out.println("\n\nLabel 'Foo' across models:\n" + summary.toString());
    // 
    // summary for macro F1
    LabelMetric macroF1 = LabelMetrics.F1.forTarget(MetricTarget.macroAverageTarget());
    DescriptiveStats summary = EvaluationAggregator.summarize(macroF1, models, pair.getB());
    out.println("\nMacro F1 across models:\n" + summary.toString());
}
Also used : Dataset(org.tribuo.Dataset) DescriptiveStats(org.tribuo.evaluation.DescriptiveStats) Model(org.tribuo.Model)

Aggregations

Dataset (org.tribuo.Dataset)7 DescriptiveStats (org.tribuo.evaluation.DescriptiveStats)7 Label (org.tribuo.classification.Label)5 Model (org.tribuo.Model)4 MetricID (org.tribuo.evaluation.metrics.MetricID)4 Pair (com.oracle.labs.mlrg.olcut.util.Pair)3 Test (org.junit.jupiter.api.Test)3 CrossValidation (org.tribuo.evaluation.CrossValidation)3 ArrayList (java.util.ArrayList)2 Prediction (org.tribuo.Prediction)2 Trainer (org.tribuo.Trainer)2 MetricTarget (org.tribuo.evaluation.metrics.MetricTarget)2 ConfigurationManager (com.oracle.labs.mlrg.olcut.config.ConfigurationManager)1 UsageException (com.oracle.labs.mlrg.olcut.config.UsageException)1 IOException (java.io.IOException)1 System.out (java.lang.System.out)1 Arrays (java.util.Arrays)1 Comparator (java.util.Comparator)1 List (java.util.List)1 Map (java.util.Map)1