use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class ClusterLabels method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
System.out.println(config);
fitModel(config);
plot(config);
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class DataSampler method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
MultiLabelClfDataSet dataSet = TRECFormat.loadMultiLabelClfDataSet(config.getString("input.dataSet"), DataSetType.ML_CLF_SPARSE, true);
List<Integer> all = IntStream.range(0, dataSet.getNumDataPoints()).boxed().collect(Collectors.toList());
int randomSeed = config.getInt("randomSeed");
List<Integer> keep = Sampling.sampleByPercentage(all, config.getDouble("percentage"), randomSeed);
MultiLabelClfDataSet subset = DataSetUtil.sampleData(dataSet, keep);
TRECFormat.save(subset, config.getString("output.dataSet"));
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class EMLevelEval method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
System.out.println(config);
RegDataSet train = TRECFormat.loadRegDataSet(config.getString("input.trainData"), DataSetType.REG_SPARSE, true);
Set<Double> unique = new HashSet<>();
for (double d : train.getLabels()) {
unique.add(d);
}
List<Double> levels = unique.stream().sorted().collect(Collectors.toList());
RegDataSet test = TRECFormat.loadRegDataSet(config.getString("input.testData"), DataSetType.REG_SPARSE, true);
double[] doubleTruth = test.getLabels();
double[] doublePred = loadPrediction(config.getString("input.prediction"));
double[] roundedPred = Arrays.stream(doublePred).map(d -> round(d, levels)).toArray();
System.out.println("before rounding");
System.out.println("rmse = " + RMSE.rmse(doubleTruth, doublePred));
System.out.println("after rounding");
System.out.println("rmse = " + RMSE.rmse(doubleTruth, roundedPred));
System.out.println("accuracy = " + IntStream.range(0, test.getNumDataPoints()).filter(i -> doubleTruth[i] == roundedPred[i]).count() / (double) test.getNumDataPoints());
System.out.println("the distribution of predicted label for a given true label");
for (int l = 0; l < levels.size(); l++) {
double level = levels.get(l);
System.out.println("for true label " + level);
truthToPred(test.getLabels(), roundedPred, level, levels);
}
System.out.println("=============================");
System.out.println("the distribution of true label for a given predicted label");
for (int l = 0; l < levels.size(); l++) {
double level = levels.get(l);
System.out.println("for predicted label " + level);
predToTruth(test.getLabels(), roundedPred, level, levels);
}
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class CBMEN method reportHammingPrediction.
private static void reportHammingPrediction(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
System.out.println("============================================================");
System.out.println("Making predictions on " + name + " set with the instance Hamming loss optimal predictor");
String output = config.getString("output.dir");
MarginalPredictor marginalPredictor = new MarginalPredictor(cbm);
marginalPredictor.setPiThreshold(config.getDouble("predict.piThreshold"));
StopWatch stopWatch = new StopWatch();
stopWatch.start();
MultiLabel[] predictions = marginalPredictor.predict(dataSet);
System.out.println("time spent on prediction = " + stopWatch);
MLMeasures mlMeasures = new MLMeasures(dataSet.getNumClasses(), dataSet.getMultiLabels(), predictions);
System.out.println(name + " performance with the instance Hamming loss optimal predictor");
System.out.println(mlMeasures);
File performanceFile = Paths.get(output, name + "_predictions", "instance_hamming_loss_optimal", "performance.txt").toFile();
FileUtils.writeStringToFile(performanceFile, mlMeasures.toString());
System.out.println(name + " performance is saved to " + performanceFile.toString());
// Here we do not use approximation
double[] setProbs = IntStream.range(0, predictions.length).parallel().mapToDouble(i -> cbm.predictAssignmentProb(dataSet.getRow(i), predictions[i])).toArray();
File predictionFile = Paths.get(output, name + "_predictions", "instance_hamming_loss_optimal", "predictions.txt").toFile();
try (BufferedWriter br = new BufferedWriter(new FileWriter(predictionFile))) {
for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
br.write(predictions[i].toString());
br.write(":");
br.write("" + setProbs[i]);
br.newLine();
}
}
System.out.println("predicted sets and their probabilities are saved to " + predictionFile.getAbsolutePath());
boolean individualPerformance = true;
if (individualPerformance) {
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.writeValue(Paths.get(output, name + "_predictions", "instance_hamming_loss_optimal", "individual_performance.json").toFile(), mlMeasures.getMacroAverage());
}
System.out.println("============================================================");
}
use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.
the class CBMGB method reportAccPrediction.
private static void reportAccPrediction(Config config, CBM cbm, MultiLabelClfDataSet dataSet, String name) throws Exception {
System.out.println("============================================================");
System.out.println("Making predictions on " + name + " set with the instance set accuracy optimal predictor");
String output = config.getString("output.dir");
AccPredictor accPredictor = new AccPredictor(cbm);
accPredictor.setComponentContributionThreshold(config.getDouble("predict.piThreshold"));
MultiLabel[] predictions = accPredictor.predict(dataSet);
MLMeasures mlMeasures = new MLMeasures(dataSet.getNumClasses(), dataSet.getMultiLabels(), predictions);
System.out.println(name + " performance with the instance set accuracy optimal predictor");
System.out.println(mlMeasures);
File performanceFile = Paths.get(output, name + "_predictions", "instance_accuracy_optimal", "performance.txt").toFile();
FileUtils.writeStringToFile(performanceFile, mlMeasures.toString());
System.out.println(name + " performance is saved to " + performanceFile.toString());
// Here we do not use approximation
double[] setProbs = IntStream.range(0, predictions.length).parallel().mapToDouble(i -> cbm.predictAssignmentProb(dataSet.getRow(i), predictions[i])).toArray();
File predictionFile = Paths.get(output, name + "_predictions", "instance_accuracy_optimal", "predictions.txt").toFile();
try (BufferedWriter br = new BufferedWriter(new FileWriter(predictionFile))) {
for (int i = 0; i < dataSet.getNumDataPoints(); i++) {
br.write(predictions[i].toString());
br.write(":");
br.write("" + setProbs[i]);
br.newLine();
}
}
System.out.println("predicted sets and their probabilities are saved to " + predictionFile.getAbsolutePath());
boolean individualPerformance = true;
if (individualPerformance) {
ObjectMapper objectMapper = new ObjectMapper();
objectMapper.writeValue(Paths.get(output, name + "_predictions", "instance_accuracy_optimal", "individual_performance.json").toFile(), mlMeasures.getMacroAverage());
}
System.out.println("============================================================");
}
Aggregations