Search in sources :

Example 1 with RegDataSet

use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.

the class RegressionSynthesizer method univarNormal.

public RegDataSet univarNormal() {
    NormalDistribution normalDistribution = new NormalDistribution(0, 1);
    RegDataSet dataSet = RegDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(1).dense(true).missingValue(false).build();
    for (int i = 0; i < numDataPoints; i++) {
        double featureValue = Sampling.doubleUniform(-1, 1);
        double label;
        label = normalDistribution.density(featureValue);
        label += noise.sample();
        dataSet.setFeatureValue(i, 0, featureValue);
        dataSet.setLabel(i, label);
    }
    return dataSet;
}
Also used : NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) RegDataSet(edu.neu.ccs.pyramid.dataset.RegDataSet)

Example 2 with RegDataSet

use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.

the class RulesTest method test1.

static void test1() throws Exception {
    int numLeaves = 4;
    RegDataSet dataSet = StandardFormat.loadRegDataSet("/Users/chengli/Datasets/slice_location/standard/featureList.txt", "/Users/chengli/Datasets/slice_location/standard/labels.txt", ",", DataSetType.REG_DENSE, false);
    System.out.println(dataSet.isDense());
    int[] activeFeatures = IntStream.range(0, dataSet.getNumFeatures()).toArray();
    int[] activeDataPoints = IntStream.range(0, dataSet.getNumDataPoints()).toArray();
    RegTreeConfig regTreeConfig = new RegTreeConfig();
    regTreeConfig.setMaxNumLeaves(numLeaves);
    regTreeConfig.setMinDataPerLeaf(5);
    regTreeConfig.setNumSplitIntervals(100);
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    RegressionTree regressionTree = RegTreeTrainer.fit(regTreeConfig, dataSet);
    TreeRule rule1 = new TreeRule(regressionTree, dataSet.getRow(100));
    TreeRule rule2 = new TreeRule(regressionTree, dataSet.getRow(1));
    ConstantRule rule3 = new ConstantRule(0.8);
    Rule rule4 = new LinearRule();
    List<Rule> rules = new ArrayList<>();
    rules.add(rule1);
    rules.add(rule2);
    rules.add(rule3);
    rules.add(rule4);
    ObjectMapper mapper = new ObjectMapper();
    mapper.writeValue(new File(TMP, "decision.json"), rules);
}
Also used : RegTreeConfig(edu.neu.ccs.pyramid.regression.regression_tree.RegTreeConfig) TreeRule(edu.neu.ccs.pyramid.regression.regression_tree.TreeRule) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) ArrayList(java.util.ArrayList) StopWatch(org.apache.commons.lang3.time.StopWatch) RegDataSet(edu.neu.ccs.pyramid.dataset.RegDataSet) TreeRule(edu.neu.ccs.pyramid.regression.regression_tree.TreeRule) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 3 with RegDataSet

use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.

the class GBRegressor method test.

private static void test(Config config) throws Exception {
    String output = config.getString("output.folder");
    File serializedModel = new File(output, "model");
    LSBoost lsBoost = (LSBoost) Serialization.deserialize(serializedModel);
    String sparsity = config.getString("input.matrixType");
    DataSetType dataSetType = null;
    switch(sparsity) {
        case "dense":
            dataSetType = DataSetType.REG_DENSE;
            break;
        case "sparse":
            dataSetType = DataSetType.REG_SPARSE;
            break;
        default:
            throw new IllegalArgumentException("input.matrixType should be dense or sparse");
    }
    RegDataSet testSet = TRECFormat.loadRegDataSet(config.getString("input.testData"), dataSetType, true);
    System.out.println("test RMSE = " + RMSE.rmse(lsBoost, testSet));
    File reportFile = new File(output, "test_predictions.txt");
    report(lsBoost, testSet, reportFile);
    System.out.println("predictions on the test set are written to " + reportFile.getAbsolutePath());
}
Also used : DataSetType(edu.neu.ccs.pyramid.dataset.DataSetType) RegDataSet(edu.neu.ccs.pyramid.dataset.RegDataSet) File(java.io.File) LSBoost(edu.neu.ccs.pyramid.regression.least_squares_boost.LSBoost)

Example 4 with RegDataSet

use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.

the class LinearRegElasticNet method main.

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        throw new IllegalArgumentException("Please specify a properties file.");
    }
    Config config = new Config(args[0]);
    System.out.println(config);
    String output = config.getString("output.folder");
    new File(output).mkdirs();
    String sparsity = config.getString("featureMatrix.sparsity").toLowerCase();
    DataSetType dataSetType = null;
    switch(sparsity) {
        case "dense":
            dataSetType = DataSetType.REG_DENSE;
            break;
        case "sparse":
            dataSetType = DataSetType.REG_SPARSE;
            break;
        default:
            throw new IllegalArgumentException("featureMatrix.sparsity can be either dense or sparse");
    }
    RegDataSet trainSet = TRECFormat.loadRegDataSet(config.getString("input.trainSet"), dataSetType, true);
    RegDataSet testSet = TRECFormat.loadRegDataSet(config.getString("input.testSet"), dataSetType, true);
    LinearRegression linearRegression = new LinearRegression(trainSet.getNumFeatures());
    ElasticNetLinearRegOptimizer optimizer = new ElasticNetLinearRegOptimizer(linearRegression, trainSet);
    optimizer.setRegularization(config.getDouble("regularization"));
    optimizer.setL1Ratio(config.getDouble("l1Ratio"));
    System.out.println("before training");
    System.out.println("training set RMSE = " + RMSE.rmse(linearRegression, trainSet));
    System.out.println("test set RMSE = " + RMSE.rmse(linearRegression, testSet));
    System.out.println("start training");
    StopWatch stopWatch = new StopWatch();
    stopWatch.start();
    optimizer.optimize();
    System.out.println("training done");
    System.out.println("time spent on training = " + stopWatch);
    System.out.println("after training");
    System.out.println("training set RMSE = " + RMSE.rmse(linearRegression, trainSet));
    System.out.println("test set RMSE = " + RMSE.rmse(linearRegression, testSet));
    System.out.println("number of non-zeros weights in linear regression (not including bias) = " + linearRegression.getWeights().getWeightsWithoutBias().getNumNonZeroElements());
    List<Pair<Integer, Double>> sorted = new ArrayList<>();
    for (Vector.Element element : linearRegression.getWeights().getWeightsWithoutBias().nonZeroes()) {
        sorted.add(new Pair<>(element.index(), element.get()));
    }
    Comparator<Pair<Integer, Double>> comparatorByIndex = Comparator.comparing(pair -> pair.getFirst());
    sorted = sorted.stream().sorted(comparatorByIndex).collect(Collectors.toList());
    StringBuilder sb1 = new StringBuilder();
    for (Pair<Integer, Double> pair : sorted) {
        int index = pair.getFirst();
        sb1.append(index).append("(").append(trainSet.getFeatureList().get(index).getName()).append(")").append(":").append(pair.getSecond()).append("\n");
    }
    FileUtils.writeStringToFile(new File(output, "features_sorted_by_indices.txt"), sb1.toString());
    System.out.println("all selected features (sorted by indices) are saved to " + new File(output, "features_sorted_by_indices.txt").getAbsolutePath());
    Comparator<Pair<Integer, Double>> comparator = Comparator.comparing(pair -> Math.abs(pair.getSecond()));
    sorted = sorted.stream().sorted(comparator.reversed()).collect(Collectors.toList());
    StringBuilder sb = new StringBuilder();
    for (Pair<Integer, Double> pair : sorted) {
        int index = pair.getFirst();
        sb.append(index).append("(").append(trainSet.getFeatureList().get(index).getName()).append(")").append(":").append(pair.getSecond()).append("\n");
    }
    FileUtils.writeStringToFile(new File(output, "features_sorted_by_weights.txt"), sb.toString());
    System.out.println("all selected features (sorted by absolute weights) are saved to " + new File(output, "features_sorted_by_weights.txt").getAbsolutePath());
    File reportFile = new File(output, "test_predictions.txt");
    report(linearRegression, testSet, reportFile);
    System.out.println("predictions on the test set are written to " + reportFile.getAbsolutePath());
}
Also used : DataSetType(edu.neu.ccs.pyramid.dataset.DataSetType) Config(edu.neu.ccs.pyramid.configuration.Config) ArrayList(java.util.ArrayList) StopWatch(org.apache.commons.lang3.time.StopWatch) ElasticNetLinearRegOptimizer(edu.neu.ccs.pyramid.regression.linear_regression.ElasticNetLinearRegOptimizer) RegDataSet(edu.neu.ccs.pyramid.dataset.RegDataSet) File(java.io.File) LinearRegression(edu.neu.ccs.pyramid.regression.linear_regression.LinearRegression) Vector(org.apache.mahout.math.Vector) Pair(edu.neu.ccs.pyramid.util.Pair)

Example 5 with RegDataSet

use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.

the class RegressionSynthesizerTest method test2.

//    private static void test1() throws Exception{
////        RegDataSet trainSet = RegressionSynthesizer.univarStep();
////        RegDataSet testSet = RegressionSynthesizer.univarStep();
//
////        RegDataSet trainSet = RegressionSynthesizer.univarSine();
////        RegDataSet testSet = RegressionSynthesizer.univarSine();
//
////        RegDataSet trainSet = RegressionSynthesizer.univarLine();
////        RegDataSet testSet = RegressionSynthesizer.univarLine();
//
////        RegDataSet trainSet = RegressionSynthesizer.univarQuadratic();
////        RegDataSet testSet = RegressionSynthesizer.univarQuadratic();
//
//        RegressionSynthesizer regressionSynthesizer = RegressionSynthesizer.getBuilder().build();
//
//        RegDataSet trainSet = regressionSynthesizer.univarExp();
//        RegDataSet testSet = regressionSynthesizer.univarExp();
//
//        TRECFormat.save(trainSet,new File(TMP,"train.trec"));
//        TRECFormat.save(testSet,new File(TMP,"test.trec"));
//
//        int[] activeFeatures = IntStream.range(0, trainSet.getNumFeatures()).toArray();
//        int[] activeDataPoints = IntStream.range(0, trainSet.getNumDataPoints()).toArray();
//        RegTreeConfig regTreeConfig = new RegTreeConfig();
//
//
//        regTreeConfig.setMaxNumLeaves(2);
//        regTreeConfig.setMinDataPerLeaf(5);
//
//
//        regTreeConfig.setNumSplitIntervals(1000);
//        RegressionTree tree = RegTreeTrainer.fit(regTreeConfig,trainSet);
//        System.out.println(tree.toString());
//
//
//        System.out.println("hard rt");
//        System.out.println("training mse = "+ MSE.mse(tree,trainSet));
//        System.out.println("test mse = "+ MSE.mse(tree,testSet));
//
//        String hardTrainPrediction = Arrays.toString(tree.predict(trainSet)).replace("[","").replace("]","");
//        FileUtils.writeStringToFile(new File(TMP,"hardTrainPrediction"),hardTrainPrediction);
//        FileUtils.writeStringToFile(new File(TMP,"hardTrainMSE"),""+MSE.mse(tree,trainSet));
//
//
//        String hardTestPrediction = Arrays.toString(tree.predict(testSet)).replace("[","").replace("]","");
//        FileUtils.writeStringToFile(new File(TMP,"hardTestPrediction"),hardTestPrediction);
//        FileUtils.writeStringToFile(new File(TMP,"hardTestMSE"),""+MSE.mse(tree,testSet));
//
//        SoftRegStumpTrainer trainer = SoftRegStumpTrainer.getBuilder()
//                .setDataSet(trainSet)
//                .setLabels(trainSet.getLabels())
//                .setFeatureType(SoftRegStumpTrainer.FeatureType.FOLLOW_HARD_TREE_FEATURE)
//                .setLossType(SoftRegStumpTrainer.LossType.SquaredLossOfExpectation)
//                .build();
//
//        Optimizer optimizer = trainer.getOptimizer();
//        optimizer.getTerminator().setMode(Terminator.Mode.STANDARD);
//        optimizer.getTerminator().setMaxIteration(10000);
//
//        SoftRegStump softRegStump = trainer.train();
//        System.out.println("prob rt");
//        System.out.println("training mse = "+ MSE.mse(softRegStump,trainSet));
//        System.out.println("test mse = "+ MSE.mse(softRegStump,testSet));
//        System.out.println(softRegStump.toString());
//
//
//        String softTrainPrediction = Arrays.toString(softRegStump.predict(trainSet)).replace("[","").replace("]","");
//        FileUtils.writeStringToFile(new File(TMP,"softTrainPrediction"),softTrainPrediction);
//        FileUtils.writeStringToFile(new File(TMP,"softTrainMSE"),""+MSE.mse(softRegStump,trainSet));
//
//
//        String softTestPrediction = Arrays.toString(softRegStump.predict(testSet)).replace("[","").replace("]","");
//        FileUtils.writeStringToFile(new File(TMP,"softTestPrediction"),softTestPrediction);
//        FileUtils.writeStringToFile(new File(TMP,"softTestMSE"),""+MSE.mse(softRegStump,testSet));
//
//        StringBuilder sb = new StringBuilder();
//        sb.append(((Sigmoid) softRegStump.getGatingFunction()).getWeights().get(0));
//        sb.append(",");
//        sb.append(((Sigmoid) softRegStump.getGatingFunction()).getBias());
//        sb.append(",");
//        sb.append(softRegStump.getLeftOutput());
//        sb.append(",");
//        sb.append(softRegStump.getRightOutput());
//
//        FileUtils.writeStringToFile(new File(TMP,"curve"),sb.toString());
//
//    }
private static void test2() {
    RegressionSynthesizer regressionSynthesizer = RegressionSynthesizer.getBuilder().setNumDataPoints(100).build();
    //        RegDataSet trainSet = RegressionSynthesizer.univarStep();
    //        RegDataSet testSet = RegressionSynthesizer.univarStep();
    //        RegDataSet trainSet = RegressionSynthesizer.univarSine();
    //        RegDataSet testSet = RegressionSynthesizer.univarSine();
    //        RegDataSet trainSet = RegressionSynthesizer.univarLine();
    //        RegDataSet testSet = RegressionSynthesizer.univarLine();
    //        RegDataSet trainSet = RegressionSynthesizer.univarQuadratic();
    //        RegDataSet testSet = RegressionSynthesizer.univarQuadratic();
    RegDataSet trainSet = regressionSynthesizer.univarExp();
    RegDataSet testSet = regressionSynthesizer.univarExp();
    TRECFormat.save(trainSet, new File(TMP, "train.trec"));
    TRECFormat.save(testSet, new File(TMP, "test.trec"));
}
Also used : RegDataSet(edu.neu.ccs.pyramid.dataset.RegDataSet) File(java.io.File)

Aggregations

RegDataSet (edu.neu.ccs.pyramid.dataset.RegDataSet)21 File (java.io.File)9 DataSetType (edu.neu.ccs.pyramid.dataset.DataSetType)4 RegTreeConfig (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeConfig)3 NormalDistribution (org.apache.commons.math3.distribution.NormalDistribution)3 Vector (org.apache.mahout.math.Vector)3 Config (edu.neu.ccs.pyramid.configuration.Config)2 LSBoost (edu.neu.ccs.pyramid.regression.least_squares_boost.LSBoost)2 RegTreeFactory (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeFactory)2 Pair (edu.neu.ccs.pyramid.util.Pair)2 ArrayList (java.util.ArrayList)2 StopWatch (org.apache.commons.lang3.time.StopWatch)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 StandardFormat (edu.neu.ccs.pyramid.dataset.StandardFormat)1 RMSE (edu.neu.ccs.pyramid.eval.RMSE)1 LSBoostOptimizer (edu.neu.ccs.pyramid.regression.least_squares_boost.LSBoostOptimizer)1 ElasticNetLinearRegOptimizer (edu.neu.ccs.pyramid.regression.linear_regression.ElasticNetLinearRegOptimizer)1 LinearRegression (edu.neu.ccs.pyramid.regression.linear_regression.LinearRegression)1 RegressionTree (edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree)1 TreeRule (edu.neu.ccs.pyramid.regression.regression_tree.TreeRule)1