Search in sources :

Example 21 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class TRECFormat method writeConfigFile.

private static void writeConfigFile(RegDataSet dataSet, File trecFile) {
    File configFile = new File(trecFile, TREC_CONFIG_FILE_NAME);
    Config config = new Config();
    config.setInt(TREC_CONFIG_NUM_DATA_POINTS, dataSet.getNumDataPoints());
    config.setInt(TREC_CONFIG_NUM_FEATURES, dataSet.getNumFeatures());
    config.setBoolean(TREC_CONFIG_MISSING_VALUE, dataSet.hasMissingValue());
    try {
        config.store(configFile);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config)

Example 22 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class TRECFormat method writeConfigFile.

private static void writeConfigFile(ClfDataSet dataSet, File trecFile) {
    File configFile = new File(trecFile, TREC_CONFIG_FILE_NAME);
    Config config = new Config();
    config.setInt(TREC_CONFIG_NUM_DATA_POINTS, dataSet.getNumDataPoints());
    config.setInt(TREC_CONFIG_NUM_FEATURES, dataSet.getNumFeatures());
    config.setInt(TREC_CONFIG_NUM_CLASSES, dataSet.getNumClasses());
    config.setBoolean(TREC_CONFIG_MISSING_VALUE, dataSet.hasMissingValue());
    try {
        config.store(configFile);
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config)

Example 23 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class GBClassifier method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    if (config.getBoolean("train")) {
        train(config);
    }
    if (config.getBoolean("test")) {
        test(config);
    }
}
Also used : RegTreeConfig(edu.neu.ccs.pyramid.regression.regression_tree.RegTreeConfig) Config(edu.neu.ccs.pyramid.configuration.Config)

Example 24 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class LibSvm2Trec method main.

public static void main(String[] args) throws Exception {
    Config config = new Config(args[0]);
    System.out.println(config);
    String dataType = config.getString("dataSetType");
    switch(dataType) {
        case "clf":
            translateClfData(config);
            break;
        case "reg":
            translateRegData(config);
            break;
        case "mlclf":
            translateMLClfData(config);
    }
}
Also used : Config(edu.neu.ccs.pyramid.configuration.Config)

Example 25 with Config

use of edu.neu.ccs.pyramid.configuration.Config in project pyramid by cheng-li.

the class LinearRegElasticNet method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    String output = config.getString("output.folder");
    new File(output).mkdirs();
    String sparsity = config.getString("featureMatrix.sparsity").toLowerCase();
    DataSetType dataSetType = null;
    switch(sparsity) {
        case "dense":
            dataSetType = DataSetType.REG_DENSE;
            break;
        case "sparse":
            dataSetType = DataSetType.REG_SPARSE;
            break;
        default:
            throw new IllegalArgumentException("featureMatrix.sparsity can be either dense or sparse");
    }
    RegDataSet trainSet = TRECFormat.loadRegDataSet(config.getString("input.trainSet"), dataSetType, true);
    RegDataSet testSet = TRECFormat.loadRegDataSet(config.getString("input.testSet"), dataSetType, true);
    LinearRegression linearRegression = new LinearRegression(trainSet.getNumFeatures());
    ElasticNetLinearRegOptimizer optimizer = new ElasticNetLinearRegOptimizer(linearRegression, trainSet);
    optimizer.setRegularization(config.getDouble("regularization"));
    optimizer.setL1Ratio(config.getDouble("l1Ratio"));
    System.out.println("before training");
    System.out.println("training set RMSE = " + RMSE.rmse(linearRegression, trainSet));
    System.out.println("test set RMSE = " + RMSE.rmse(linearRegression, testSet));
    System.out.println("start training");
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    optimizer.optimize();
    System.out.println("training done");
    System.out.println("time spent on training = " + stopWatch);
    System.out.println("after training");
    System.out.println("training set RMSE = " + RMSE.rmse(linearRegression, trainSet));
    System.out.println("test set RMSE = " + RMSE.rmse(linearRegression, testSet));
    System.out.println("number of non-zeros weights in linear regression (not including bias) = " + linearRegression.getWeights().getWeightsWithoutBias().getNumNonZeroElements());
    List<Pair<Integer, Double>> sorted = new ArrayList<>();
    for (Vector.Element element : linearRegression.getWeights().getWeightsWithoutBias().nonZeroes()) {
        sorted.add(new Pair<>(element.index(), element.get()));
    }
    Comparator<Pair<Integer, Double>> comparatorByIndex = Comparator.comparing(pair -> pair.getFirst());
    sorted = sorted.stream().sorted(comparatorByIndex).collect(Collectors.toList());
    StringBuilder sb1 = new StringBuilder();
    for (Pair<Integer, Double> pair : sorted) {
        int index = pair.getFirst();
        sb1.append(index).append("(").append(trainSet.getFeatureList().get(index).getName()).append(")").append(":").append(pair.getSecond()).append("\n");
    }
    FileUtils.writeStringToFile(new File(output, "features_sorted_by_indices.txt"), sb1.toString());
    System.out.println("all selected features (sorted by indices) are saved to " + new File(output, "features_sorted_by_indices.txt").getAbsolutePath());
    Comparator<Pair<Integer, Double>> comparator = Comparator.comparing(pair -> Math.abs(pair.getSecond()));
    sorted = sorted.stream().sorted(comparator.reversed()).collect(Collectors.toList());
    StringBuilder sb = new StringBuilder();
    for (Pair<Integer, Double> pair : sorted) {
        int index = pair.getFirst();
        sb.append(index).append("(").append(trainSet.getFeatureList().get(index).getName()).append(")").append(":").append(pair.getSecond()).append("\n");
    }
    FileUtils.writeStringToFile(new File(output, "features_sorted_by_weights.txt"), sb.toString());
    System.out.println("all selected features (sorted by absolute weights) are saved to " + new File(output, "features_sorted_by_weights.txt").getAbsolutePath());
    File reportFile = new File(output, "test_predictions.txt");
    report(linearRegression, testSet, reportFile);
    System.out.println("predictions on the test set are written to " + reportFile.getAbsolutePath());
}
Also used : DataSetType(edu.neu.ccs.pyramid.dataset.DataSetType) Config(edu.neu.ccs.pyramid.configuration.Config) ArrayList(java.util.ArrayList) StopWatch(org.apache.commons.lang3.time.StopWatch) ElasticNetLinearRegOptimizer(edu.neu.ccs.pyramid.regression.linear_regression.ElasticNetLinearRegOptimizer) RegDataSet(edu.neu.ccs.pyramid.dataset.RegDataSet) File(java.io.File) LinearRegression(edu.neu.ccs.pyramid.regression.linear_regression.LinearRegression) Vector(org.apache.mahout.math.Vector) Pair(edu.neu.ccs.pyramid.util.Pair)

Aggregations

Config (edu.neu.ccs.pyramid.configuration.Config)59 File (java.io.File)35 Collectors (java.util.stream.Collectors)18 FileUtils (org.apache.commons.io.FileUtils)18 StopWatch (org.apache.commons.lang3.time.StopWatch)18 Serialization (edu.neu.ccs.pyramid.util.Serialization)17 BufferedWriter (java.io.BufferedWriter)17 FileWriter (java.io.FileWriter)17 Paths (java.nio.file.Paths)17 IntStream (java.util.stream.IntStream)16 Pair (edu.neu.ccs.pyramid.util.Pair)15 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)14 EarlyStopper (edu.neu.ccs.pyramid.optimization.EarlyStopper)14 java.util (java.util)14 MLMeasures (edu.neu.ccs.pyramid.eval.MLMeasures)13 PrintUtil (edu.neu.ccs.pyramid.util.PrintUtil)13 MultiLabelClassifier (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelClassifier)12 edu.neu.ccs.pyramid.multilabel_classification.cbm (edu.neu.ccs.pyramid.multilabel_classification.cbm)12 ListUtil (edu.neu.ccs.pyramid.util.ListUtil)12 IOException (java.io.IOException)8