Search in sources :

Example 81 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class ClusterLabels method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    fitModel(config);
    plot(config);
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config)

Example 82 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class DataSampler method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    MultiLabelClfDataSet dataSet = TRECFormat.loadMultiLabelClfDataSet(config.getString("input.dataSet"), DataSetType.ML_CLF_SPARSE, true);
    List<Integer> all = IntStream.range(0, dataSet.getNumDataPoints()).boxed().collect(Collectors.toList());
    int randomSeed = config.getInt("randomSeed");
    List<Integer> keep = Sampling.sampleByPercentage(all, config.getDouble("percentage"), randomSeed);
    MultiLabelClfDataSet subset = DataSetUtil.sampleData(dataSet, keep);
    TRECFormat.save(subset, config.getString("output.dataSet"));
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)

Example 83 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class EMLevelEval method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    RegDataSet train = TRECFormat.loadRegDataSet(config.getString("input.trainData"), DataSetType.REG_SPARSE, true);
    Set<Double> unique = new HashSet<>();
    for (double d : train.getLabels()) {
        unique.add(d);
    }
    List<Double> levels = unique.stream().sorted().collect(Collectors.toList());
    RegDataSet test = TRECFormat.loadRegDataSet(config.getString("input.testData"), DataSetType.REG_SPARSE, true);
    double[] doubleTruth = test.getLabels();
    double[] doublePred = loadPrediction(config.getString("input.prediction"));
    double[] roundedPred = Arrays.stream(doublePred).map(d -> round(d, levels)).toArray();
    System.out.println("before rounding");
    System.out.println("rmse = " + RMSE.rmse(doubleTruth, doublePred));
    System.out.println("after rounding");
    System.out.println("rmse = " + RMSE.rmse(doubleTruth, roundedPred));
    System.out.println("accuracy = " + IntStream.range(0, test.getNumDataPoints()).filter(i -> doubleTruth[i] == roundedPred[i]).count() / (double) test.getNumDataPoints());
    System.out.println("the distribution of predicted label for a given true label");
    for (int l = 0; l < levels.size(); l++) {
        double level = levels.get(l);
        System.out.println("for true label " + level);
        truthToPred(test.getLabels(), roundedPred, level, levels);
    }
    System.out.println("=============================");
    System.out.println("the distribution of true label for a given predicted label");
    for (int l = 0; l < levels.size(); l++) {
        double level = levels.get(l);
        System.out.println("for predicted label " + level);
        predToTruth(test.getLabels(), roundedPred, level, levels);
    }
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) Set(java.util.Set) FileUtils(org.apache.commons.io.FileUtils) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) HashSet(java.util.HashSet) List(java.util.List) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) Accuracy(edu.neu.ccs.pyramid.eval.Accuracy) RMSE(edu.neu.ccs.pyramid.eval.RMSE) Config(edu.neu.ccs.pyramid.configuration.Config) Config(edu.neu.ccs.pyramid.configuration.Config) HashSet(java.util.HashSet)

Example 84 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class CBMEN method reportHammingPrediction.

private static void reportHammingPrediction(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
    System.out.println("============================================================");
    System.out.println("Making predictions on " + name + " set with the instance Hamming loss optimal predictor");
    String output = config.getString("output.dir");
    MarginalPredictor marginalPredictor = new MarginalPredictor(cbm);
    marginalPredictor.setPiThreshold(config.getDouble("predict.piThreshold"));
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    MultiLabel[] predictions = marginalPredictor.predict(dataSet);
    System.out.println("time spent on prediction = " + stopWatch);
    MLMeasures mlMeasures = new MLMeasures(dataSet.getNumClasses(), dataSet.getMultiLabels(), predictions);
    System.out.println(name + " performance with the instance Hamming loss optimal predictor");
    System.out.println(mlMeasures);
    File performanceFile = Paths.get(output, name + "_predictions", "instance_hamming_loss_optimal", "performance.txt").toFile();
    FileUtils.writeStringToFile(performanceFile, mlMeasures.toString());
    System.out.println(name + " performance is saved to " + performanceFile.toString());
    // Here we do not use approximation
    double[] setProbs = IntStream.range(0, predictions.length).parallel().mapToDouble(i -> cbm.predictAssignmentProb(dataSet.getRow(i), predictions[i])).toArray();
    File predictionFile = Paths.get(output, name + "_predictions", "instance_hamming_loss_optimal", "predictions.txt").toFile();
    try (BufferedWriter br = new BufferedWriter(new FileWriter(predictionFile))) {
        for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
            br.write(predictions[i].toString());
            br.write(":");
            br.write("" + setProbs[i]);
            br.newLine();
        }
    }
    System.out.println("predicted sets and their probabilities are saved to " + predictionFile.getAbsolutePath());
    boolean individualPerformance = true;
    if (individualPerformance) {
        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.writeValue(Paths.get(output, name + "_predictions", "instance_hamming_loss_optimal", "individual_performance.json").toFile(), mlMeasures.getMacroAverage());
    }
    System.out.println("============================================================");
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) Pair(edu.neu.ccs.pyramid.util.Pair) Config(edu.neu.ccs.pyramid.configuration.Config) ListUtil(edu.neu.ccs.pyramid.util.ListUtil) EarlyStopper(edu.neu.ccs.pyramid.optimization.EarlyStopper) BufferedWriter(java.io.BufferedWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) MAP(edu.neu.ccs.pyramid.eval.MAP) FileWriter(java.io.FileWriter) MultiLabelClassifier(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier) FileUtils(org.apache.commons.io.FileUtils) StopWatch(org.apache.commons.lang3.time.StopWatch) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Collectors(java.util.stream.Collectors) File(java.io.File) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) Feature(edu.neu.ccs.pyramid.feature.Feature) Serialization(edu.neu.ccs.pyramid.util.Serialization) PrintUtil(edu.neu.ccs.pyramid.util.PrintUtil) Paths(java.nio.file.Paths) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) LogisticRegressionInspector(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegressionInspector) edu.neu.ccs.pyramid.multilabel_classification.cbm(edu.neu.ccs.pyramid.multilabel_classification.cbm) FileWriter(java.io.FileWriter) StopWatch(org.apache.commons.lang3.time.StopWatch) BufferedWriter(java.io.BufferedWriter) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 85 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class CBMGB method reportAccPrediction.

private static void reportAccPrediction(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
    System.out.println("============================================================");
    System.out.println("Making predictions on " + name + " set with the instance set accuracy optimal predictor");
    String output = config.getString("output.dir");
    AccPredictor accPredictor = new AccPredictor(cbm);
    accPredictor.setComponentContributionThreshold(config.getDouble("predict.piThreshold"));
    MultiLabel[] predictions = accPredictor.predict(dataSet);
    MLMeasures mlMeasures = new MLMeasures(dataSet.getNumClasses(), dataSet.getMultiLabels(), predictions);
    System.out.println(name + " performance with the instance set accuracy optimal predictor");
    System.out.println(mlMeasures);
    File performanceFile = Paths.get(output, name + "_predictions", "instance_accuracy_optimal", "performance.txt").toFile();
    FileUtils.writeStringToFile(performanceFile, mlMeasures.toString());
    System.out.println(name + " performance is saved to " + performanceFile.toString());
    // Here we do not use approximation
    double[] setProbs = IntStream.range(0, predictions.length).parallel().mapToDouble(i -> cbm.predictAssignmentProb(dataSet.getRow(i), predictions[i])).toArray();
    File predictionFile = Paths.get(output, name + "_predictions", "instance_accuracy_optimal", "predictions.txt").toFile();
    try (BufferedWriter br = new BufferedWriter(new FileWriter(predictionFile))) {
        for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
            br.write(predictions[i].toString());
            br.write(":");
            br.write("" + setProbs[i]);
            br.newLine();
        }
    }
    System.out.println("predicted sets and their probabilities are saved to " + predictionFile.getAbsolutePath());
    boolean individualPerformance = true;
    if (individualPerformance) {
        ObjectMapper objectMapper = new ObjectMapper();
        objectMapper.writeValue(Paths.get(output, name + "_predictions", "instance_accuracy_optimal", "individual_performance.json").toFile(), mlMeasures.getMacroAverage());
    }
    System.out.println("============================================================");
}
Also used : IntStream(java.util.stream.IntStream) java.util(java.util) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) AveragePrecision(edu.neu.ccs.pyramid.eval.AveragePrecision) Pair(edu.neu.ccs.pyramid.util.Pair) Config(edu.neu.ccs.pyramid.configuration.Config) ListUtil(edu.neu.ccs.pyramid.util.ListUtil) TRECFormat(edu.neu.ccs.pyramid.dataset.TRECFormat) EarlyStopper(edu.neu.ccs.pyramid.optimization.EarlyStopper) BufferedWriter(java.io.BufferedWriter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) MAP(edu.neu.ccs.pyramid.eval.MAP) FileWriter(java.io.FileWriter) MultiLabelClassifier(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier) FileUtils(org.apache.commons.io.FileUtils) StopWatch(org.apache.commons.lang3.time.StopWatch) Collectors(java.util.stream.Collectors) File(java.io.File) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) Serialization(edu.neu.ccs.pyramid.util.Serialization) PrintUtil(edu.neu.ccs.pyramid.util.PrintUtil) Paths(java.nio.file.Paths) DataSetUtil(edu.neu.ccs.pyramid.dataset.DataSetUtil) edu.neu.ccs.pyramid.multilabel_classification.cbm(edu.neu.ccs.pyramid.multilabel_classification.cbm) LogLikelihood(edu.neu.ccs.pyramid.eval.LogLikelihood) MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) FileWriter(java.io.FileWriter) BufferedWriter(java.io.BufferedWriter) MLMeasures(edu.neu.ccs.pyramid.eval.MLMeasures) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Aggregations

Config (edu.neu.ccs.pyramid.configuration.Config)119 File (java.io.File)68 Collectors (java.util.stream.Collectors)40 FileUtils (org.apache.commons.io.FileUtils)40 Paths (java.nio.file.Paths)39 IntStream (java.util.stream.IntStream)37 Pair (edu.neu.ccs.pyramid.util.Pair)36 Serialization (edu.neu.ccs.pyramid.util.Serialization)35 StopWatch (org.apache.commons.lang3.time.StopWatch)34 MLMeasures (edu.neu.ccs.pyramid.eval.MLMeasures)33 BufferedWriter (java.io.BufferedWriter)32 FileWriter (java.io.FileWriter)32 java.util (java.util)32 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)31 MultiLabelClassifier (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier)29 EarlyStopper (edu.neu.ccs.pyramid.optimization.EarlyStopper)28 PrintUtil (edu.neu.ccs.pyramid.util.PrintUtil)26 edu.neu.ccs.pyramid.multilabel_classification.cbm (edu.neu.ccs.pyramid.multilabel_classification.cbm)25 ListUtil (edu.neu.ccs.pyramid.util.ListUtil)25 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)22