Search in sources :

Example 1 with MAP

use of edu.neu.ccs.pyramid.eval.MAP in project pyramid by cheng-li.

the class CBMGB method reportGeneral.

private static void reportGeneral(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
    System.out.println("============================================================");
    System.out.println("computing other predictor-independent metrics");
    System.out.println("label averaged MAP");
    System.out.println(MAP.map(cbm, dataSet));
    System.out.println("instance averaged MAP");
    System.out.println(MAP.instanceMAP(cbm, dataSet));
    System.out.println("global AP truncated at 30");
    System.out.println(AveragePrecision.globalAveragePrecisionTruncated(cbm, dataSet, 30));
    String output = config.getString("output.dir");
    File labelProbFile = Paths.get(output, name + "_predictions", "label_probabilities.txt").toFile();
    double labelProbThreshold = config.getDouble("report.labelProbThreshold");
    try (BufferedWriter br = new BufferedWriter(new FileWriter(labelProbFile))) {
        for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
            br.write(CBMInspector.topLabels(cbm, dataSet.getRow(i), labelProbThreshold));
            br.newLine();
        }
    }
    System.out.println("individual label probabilities are saved to " + labelProbFile.getAbsolutePath());
    List<Integer> unobservedLabels = Arrays.stream(FileUtils.readFileToString(new File(output, "unobserved_labels.txt")).split(",")).map(s -> s.trim()).filter(s -> !s.isEmpty()).map(s -> Integer.parseInt(s)).collect(Collectors.toList());
    // Here we do not use approximation
    double[] logLikelihoods = IntStream.range(0, dataSet.getNumDataPoints()).parallel().mapToDouble(i -> cbm.predictLogAssignmentProb(dataSet.getRow(i), dataSet.getMultiLabels()[i])).toArray();
    double average = IntStream.range(0, dataSet.getNumDataPoints()).filter(i -> !containsNovelClass(dataSet.getMultiLabels()[i], unobservedLabels)).mapToDouble(i -> logLikelihoods[i]).average().getAsDouble();
    File logLikelihoodFile = Paths.get(output, name + "_predictions", "ground_truth_log_likelihood.txt").toFile();
    FileUtils.writeStringToFile(logLikelihoodFile, PrintUtil.toMutipleLines(logLikelihoods));
    System.out.println("individual log likelihood of the " + name + " ground truth label set is written to " + logLikelihoodFile.getAbsolutePath());
    System.out.println("average log likelihood of the " + name + " ground truth label sets = " + average);
    if (!unobservedLabels.isEmpty() && name.equals("test")) {
        System.out.println("This is computed by ignoring test instances with new labels unobserved during training");
        System.out.println("The following labels do not actually appear in the training set and therefore cannot be learned:");
        System.out.println(ListUtil.toSimpleString(unobservedLabels));
    }
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) AveragePrecision(edu.neu.ccs.pyramid.eval.AveragePrecision) Pair(edu.neu.ccs.pyramid.util.Pair) Config(edu.neu.ccs.pyramid.configuration.Config) ListUtil(edu.neu.ccs.pyramid.util.ListUtil) TRECFormat(edu.neu.ccs.pyramid.dataset.TRECFormat) EarlyStopper(edu.neu.ccs.pyramid.optimization.EarlyStopper) BufferedWriter(java.io.BufferedWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) MAP(edu.neu.ccs.pyramid.eval.MAP) FileWriter(java.io.FileWriter) MultiLabelClassifier(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier) FileUtils(org.apache.commons.io.FileUtils) StopWatch(org.apache.commons.lang3.time.StopWatch) Collectors(java.util.stream.Collectors) File(java.io.File) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) Serialization(edu.neu.ccs.pyramid.util.Serialization) PrintUtil(edu.neu.ccs.pyramid.util.PrintUtil) Paths(java.nio.file.Paths) DataSetUtil(edu.neu.ccs.pyramid.dataset.DataSetUtil) edu.neu.ccs.pyramid.multilabel_classification.cbm(edu.neu.ccs.pyramid.multilabel_classification.cbm) LogLikelihood(edu.neu.ccs.pyramid.eval.LogLikelihood) FileWriter(java.io.FileWriter) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Example 2 with MAP

use of edu.neu.ccs.pyramid.eval.MAP in project pyramid by cheng-li.

the class CBMLR method reportGeneral.

private static void reportGeneral(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
    System.out.println("============================================================");
    System.out.println("computing other predictor-independent metrics");
    System.out.println("label averaged MAP");
    System.out.println(MAP.map(cbm, dataSet));
    System.out.println("instance averaged MAP");
    System.out.println(MAP.instanceMAP(cbm, dataSet));
    System.out.println("global AP truncated at 30");
    System.out.println(AveragePrecision.globalAveragePrecisionTruncated(cbm, dataSet, 30));
    String output = config.getString("output.dir");
    File labelProbFile = Paths.get(output, name + "_predictions", "label_probabilities.txt").toFile();
    double labelProbThreshold = config.getDouble("report.labelProbThreshold");
    try (BufferedWriter br = new BufferedWriter(new FileWriter(labelProbFile))) {
        for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
            br.write(CBMInspector.topLabels(cbm, dataSet.getRow(i), labelProbThreshold));
            br.newLine();
        }
    }
    System.out.println("individual label probabilities are saved to " + labelProbFile.getAbsolutePath());
    List<Integer> unobservedLabels = Arrays.stream(FileUtils.readFileToString(new File(output, "unobserved_labels.txt")).split(",")).map(s -> s.trim()).filter(s -> !s.isEmpty()).map(s -> Integer.parseInt(s)).collect(Collectors.toList());
    // Here we do not use approximation
    double[] logLikelihoods = IntStream.range(0, dataSet.getNumDataPoints()).parallel().mapToDouble(i -> cbm.predictLogAssignmentProb(dataSet.getRow(i), dataSet.getMultiLabels()[i])).toArray();
    double average = IntStream.range(0, dataSet.getNumDataPoints()).filter(i -> !containsNovelClass(dataSet.getMultiLabels()[i], unobservedLabels)).mapToDouble(i -> logLikelihoods[i]).average().getAsDouble();
    File logLikelihoodFile = Paths.get(output, name + "_predictions", "ground_truth_log_likelihood.txt").toFile();
    FileUtils.writeStringToFile(logLikelihoodFile, PrintUtil.toMutipleLines(logLikelihoods));
    System.out.println("individual log likelihood of the " + name + " ground truth label set is written to " + logLikelihoodFile.getAbsolutePath());
    System.out.println("average log likelihood of the " + name + " ground truth label sets = " + average);
    if (!unobservedLabels.isEmpty() && name.equals("test")) {
        System.out.println("This is computed by ignoring test instances with new labels unobserved during training");
        System.out.println("The following labels do not actually appear in the training set and therefore cannot be learned:");
        System.out.println(ListUtil.toSimpleString(unobservedLabels));
    }
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) AveragePrecision(edu.neu.ccs.pyramid.eval.AveragePrecision) Pair(edu.neu.ccs.pyramid.util.Pair) Config(edu.neu.ccs.pyramid.configuration.Config) ListUtil(edu.neu.ccs.pyramid.util.ListUtil) EarlyStopper(edu.neu.ccs.pyramid.optimization.EarlyStopper) BufferedWriter(java.io.BufferedWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) MAP(edu.neu.ccs.pyramid.eval.MAP) FileWriter(java.io.FileWriter) MultiLabelClassifier(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier) FileUtils(org.apache.commons.io.FileUtils) StopWatch(org.apache.commons.lang3.time.StopWatch) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Collectors(java.util.stream.Collectors) File(java.io.File) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) Feature(edu.neu.ccs.pyramid.feature.Feature) Serialization(edu.neu.ccs.pyramid.util.Serialization) PrintUtil(edu.neu.ccs.pyramid.util.PrintUtil) Paths(java.nio.file.Paths) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) LogisticRegressionInspector(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegressionInspector) edu.neu.ccs.pyramid.multilabel_classification.cbm(edu.neu.ccs.pyramid.multilabel_classification.cbm) LogLikelihood(edu.neu.ccs.pyramid.eval.LogLikelihood) FileWriter(java.io.FileWriter) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Example 3 with MAP

use of edu.neu.ccs.pyramid.eval.MAP in project pyramid by cheng-li.

the class CBMEN method reportGeneral.

private static void reportGeneral(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
    System.out.println("============================================================");
    System.out.println("computing other predictor-independent metrics");
    System.out.println("label averaged MAP");
    System.out.println(MAP.map(cbm, dataSet));
    // todo
    // System.out.println("instance averaged MAP");
    // System.out.println(MAP.instanceMAP(cbm, dataSet));
    // System.out.println("global AP truncated at 30");
    // System.out.println(AveragePrecision.globalAveragePrecisionTruncated(cbm, dataSet, 30));
    String output = config.getString("output.dir");
    File labelProbFile = Paths.get(output, name + "_predictions", "label_probabilities.txt").toFile();
    double labelProbThreshold = config.getDouble("report.labelProbThreshold");
    try (BufferedWriter br = new BufferedWriter(new FileWriter(labelProbFile))) {
        for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
            br.write(CBMInspector.topLabels(cbm, dataSet.getRow(i), labelProbThreshold));
            br.newLine();
        }
    }
    System.out.println("individual label probabilities are saved to " + labelProbFile.getAbsolutePath());
    List<Integer> unobservedLabels = Arrays.stream(FileUtils.readFileToString(new File(output, "unobserved_labels.txt")).split(",")).map(s -> s.trim()).filter(s -> !s.isEmpty()).map(s -> Integer.parseInt(s)).collect(Collectors.toList());
    // Here we do not use approximation
    double[] logLikelihoods = IntStream.range(0, dataSet.getNumDataPoints()).parallel().mapToDouble(i -> cbm.predictLogAssignmentProb(dataSet.getRow(i), dataSet.getMultiLabels()[i])).toArray();
    double average = IntStream.range(0, dataSet.getNumDataPoints()).filter(i -> !containsNovelClass(dataSet.getMultiLabels()[i], unobservedLabels)).mapToDouble(i -> logLikelihoods[i]).average().getAsDouble();
    File logLikelihoodFile = Paths.get(output, name + "_predictions", "ground_truth_log_likelihood.txt").toFile();
    FileUtils.writeStringToFile(logLikelihoodFile, PrintUtil.toMutipleLines(logLikelihoods));
    System.out.println("individual log likelihood of the " + name + " ground truth label set is written to " + logLikelihoodFile.getAbsolutePath());
    System.out.println("average log likelihood of the " + name + " ground truth label sets = " + average);
    if (!unobservedLabels.isEmpty() && name.equals("test")) {
        System.out.println("This is computed by ignoring test instances with new labels unobserved during training");
        System.out.println("The following labels do not actually appear in the training set and therefore cannot be learned:");
        System.out.println(ListUtil.toSimpleString(unobservedLabels));
    }
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) Pair(edu.neu.ccs.pyramid.util.Pair) Config(edu.neu.ccs.pyramid.configuration.Config) ListUtil(edu.neu.ccs.pyramid.util.ListUtil) EarlyStopper(edu.neu.ccs.pyramid.optimization.EarlyStopper) BufferedWriter(java.io.BufferedWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) MAP(edu.neu.ccs.pyramid.eval.MAP) FileWriter(java.io.FileWriter) MultiLabelClassifier(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier) FileUtils(org.apache.commons.io.FileUtils) StopWatch(org.apache.commons.lang3.time.StopWatch) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Collectors(java.util.stream.Collectors) File(java.io.File) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) Feature(edu.neu.ccs.pyramid.feature.Feature) Serialization(edu.neu.ccs.pyramid.util.Serialization) PrintUtil(edu.neu.ccs.pyramid.util.PrintUtil) Paths(java.nio.file.Paths) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) LogisticRegressionInspector(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegressionInspector) edu.neu.ccs.pyramid.multilabel_classification.cbm(edu.neu.ccs.pyramid.multilabel_classification.cbm) FileWriter(java.io.FileWriter) File(java.io.File) BufferedWriter(java.io.BufferedWriter)

Aggregations

ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)3 Config (edu.neu.ccs.pyramid.configuration.Config)3 MAP (edu.neu.ccs.pyramid.eval.MAP)3 MLMeasures (edu.neu.ccs.pyramid.eval.MLMeasures)3 MultiLabelClassifier (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier)3 edu.neu.ccs.pyramid.multilabel_classification.cbm (edu.neu.ccs.pyramid.multilabel_classification.cbm)3 EarlyStopper (edu.neu.ccs.pyramid.optimization.EarlyStopper)3 ListUtil (edu.neu.ccs.pyramid.util.ListUtil)3 Pair (edu.neu.ccs.pyramid.util.Pair)3 PrintUtil (edu.neu.ccs.pyramid.util.PrintUtil)3 Serialization (edu.neu.ccs.pyramid.util.Serialization)3 BufferedWriter (java.io.BufferedWriter)3 File (java.io.File)3 FileWriter (java.io.FileWriter)3 Paths (java.nio.file.Paths)3 java.util (java.util)3 Collectors (java.util.stream.Collectors)3 IntStream (java.util.stream.IntStream)3 FileUtils (org.apache.commons.io.FileUtils)3 StopWatch (org.apache.commons.lang3.time.StopWatch)3