use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.
the class RegressionSynthesizer method univarNormal.
public RegDataSet univarNormal() {
NormalDistribution normalDistribution = new NormalDistribution(0, 1);
RegDataSet dataSet = RegDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(1).dense(true).missingValue(false).build();
for (int i = 0; i < numDataPoints; i++) {
double featureValue = Sampling.doubleUniform(-1, 1);
double label;
label = normalDistribution.density(featureValue);
label += noise.sample();
dataSet.setFeatureValue(i, 0, featureValue);
dataSet.setLabel(i, label);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.
the class RulesTest method test1.
static void test1() throws Exception {
int numLeaves = 4;
RegDataSet dataSet = StandardFormat.loadRegDataSet("/Users/chengli/Datasets/slice_location/standard/featureList.txt", "/Users/chengli/Datasets/slice_location/standard/labels.txt", ",", DataSetType.REG_DENSE, false);
System.out.println(dataSet.isDense());
int[] activeFeatures = IntStream.range(0, dataSet.getNumFeatures()).toArray();
int[] activeDataPoints = IntStream.range(0, dataSet.getNumDataPoints()).toArray();
RegTreeConfig regTreeConfig = new RegTreeConfig();
regTreeConfig.setMaxNumLeaves(numLeaves);
regTreeConfig.setMinDataPerLeaf(5);
regTreeConfig.setNumSplitIntervals(100);
StopWatch stopWatch = new StopWatch();
stopWatch.start();
RegressionTree regressionTree = RegTreeTrainer.fit(regTreeConfig, dataSet);
TreeRule rule1 = new TreeRule(regressionTree, dataSet.getRow(100));
TreeRule rule2 = new TreeRule(regressionTree, dataSet.getRow(1));
ConstantRule rule3 = new ConstantRule(0.8);
Rule rule4 = new LinearRule();
List<Rule> rules = new ArrayList<>();
rules.add(rule1);
rules.add(rule2);
rules.add(rule3);
rules.add(rule4);
ObjectMapper mapper = new ObjectMapper();
mapper.writeValue(new File(TMP, "decision.json"), rules);
}
use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.
the class GBRegressor method test.
private static void test(Config config) throws Exception {
String output = config.getString("output.folder");
File serializedModel = new File(output, "model");
LSBoost lsBoost = (LSBoost) Serialization.deserialize(serializedModel);
String sparsity = config.getString("input.matrixType");
DataSetType dataSetType = null;
switch(sparsity) {
case "dense":
dataSetType = DataSetType.REG_DENSE;
break;
case "sparse":
dataSetType = DataSetType.REG_SPARSE;
break;
default:
throw new IllegalArgumentException("input.matrixType should be dense or sparse");
}
RegDataSet testSet = TRECFormat.loadRegDataSet(config.getString("input.testData"), dataSetType, true);
System.out.println("test RMSE = " + RMSE.rmse(lsBoost, testSet));
File reportFile = new File(output, "test_predictions.txt");
report(lsBoost, testSet, reportFile);
System.out.println("predictions on the test set are written to " + reportFile.getAbsolutePath());
}
use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.
the class LinearRegElasticNet method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
System.out.println(config);
String output = config.getString("output.folder");
new File(output).mkdirs();
String sparsity = config.getString("featureMatrix.sparsity").toLowerCase();
DataSetType dataSetType = null;
switch(sparsity) {
case "dense":
dataSetType = DataSetType.REG_DENSE;
break;
case "sparse":
dataSetType = DataSetType.REG_SPARSE;
break;
default:
throw new IllegalArgumentException("featureMatrix.sparsity can be either dense or sparse");
}
RegDataSet trainSet = TRECFormat.loadRegDataSet(config.getString("input.trainSet"), dataSetType, true);
RegDataSet testSet = TRECFormat.loadRegDataSet(config.getString("input.testSet"), dataSetType, true);
LinearRegression linearRegression = new LinearRegression(trainSet.getNumFeatures());
ElasticNetLinearRegOptimizer optimizer = new ElasticNetLinearRegOptimizer(linearRegression, trainSet);
optimizer.setRegularization(config.getDouble("regularization"));
optimizer.setL1Ratio(config.getDouble("l1Ratio"));
System.out.println("before training");
System.out.println("training set RMSE = " + RMSE.rmse(linearRegression, trainSet));
System.out.println("test set RMSE = " + RMSE.rmse(linearRegression, testSet));
System.out.println("start training");
StopWatch stopWatch = new StopWatch();
stopWatch.start();
optimizer.optimize();
System.out.println("training done");
System.out.println("time spent on training = " + stopWatch);
System.out.println("after training");
System.out.println("training set RMSE = " + RMSE.rmse(linearRegression, trainSet));
System.out.println("test set RMSE = " + RMSE.rmse(linearRegression, testSet));
System.out.println("number of non-zeros weights in linear regression (not including bias) = " + linearRegression.getWeights().getWeightsWithoutBias().getNumNonZeroElements());
List<Pair<Integer, Double>> sorted = new ArrayList<>();
for (Vector.Element element : linearRegression.getWeights().getWeightsWithoutBias().nonZeroes()) {
sorted.add(new Pair<>(element.index(), element.get()));
}
Comparator<Pair<Integer, Double>> comparatorByIndex = Comparator.comparing(pair -> pair.getFirst());
sorted = sorted.stream().sorted(comparatorByIndex).collect(Collectors.toList());
StringBuilder sb1 = new StringBuilder();
for (Pair<Integer, Double> pair : sorted) {
int index = pair.getFirst();
sb1.append(index).append("(").append(trainSet.getFeatureList().get(index).getName()).append(")").append(":").append(pair.getSecond()).append("\n");
}
FileUtils.writeStringToFile(new File(output, "features_sorted_by_indices.txt"), sb1.toString());
System.out.println("all selected features (sorted by indices) are saved to " + new File(output, "features_sorted_by_indices.txt").getAbsolutePath());
Comparator<Pair<Integer, Double>> comparator = Comparator.comparing(pair -> Math.abs(pair.getSecond()));
sorted = sorted.stream().sorted(comparator.reversed()).collect(Collectors.toList());
StringBuilder sb = new StringBuilder();
for (Pair<Integer, Double> pair : sorted) {
int index = pair.getFirst();
sb.append(index).append("(").append(trainSet.getFeatureList().get(index).getName()).append(")").append(":").append(pair.getSecond()).append("\n");
}
FileUtils.writeStringToFile(new File(output, "features_sorted_by_weights.txt"), sb.toString());
System.out.println("all selected features (sorted by absolute weights) are saved to " + new File(output, "features_sorted_by_weights.txt").getAbsolutePath());
File reportFile = new File(output, "test_predictions.txt");
report(linearRegression, testSet, reportFile);
System.out.println("predictions on the test set are written to " + reportFile.getAbsolutePath());
}
use of edu.neu.ccs.pyramid.dataset.RegDataSet in project pyramid by cheng-li.
the class RegressionSynthesizerTest method test2.
// private static void test1() throws Exception{
//// RegDataSet trainSet = RegressionSynthesizer.univarStep();
//// RegDataSet testSet = RegressionSynthesizer.univarStep();
//
//// RegDataSet trainSet = RegressionSynthesizer.univarSine();
//// RegDataSet testSet = RegressionSynthesizer.univarSine();
//
//// RegDataSet trainSet = RegressionSynthesizer.univarLine();
//// RegDataSet testSet = RegressionSynthesizer.univarLine();
//
//// RegDataSet trainSet = RegressionSynthesizer.univarQuadratic();
//// RegDataSet testSet = RegressionSynthesizer.univarQuadratic();
//
// RegressionSynthesizer regressionSynthesizer = RegressionSynthesizer.getBuilder().build();
//
// RegDataSet trainSet = regressionSynthesizer.univarExp();
// RegDataSet testSet = regressionSynthesizer.univarExp();
//
// TRECFormat.save(trainSet,new File(TMP,"train.trec"));
// TRECFormat.save(testSet,new File(TMP,"test.trec"));
//
// int[] activeFeatures = IntStream.range(0, trainSet.getNumFeatures()).toArray();
// int[] activeDataPoints = IntStream.range(0, trainSet.getNumDataPoints()).toArray();
// RegTreeConfig regTreeConfig = new RegTreeConfig();
//
//
// regTreeConfig.setMaxNumLeaves(2);
// regTreeConfig.setMinDataPerLeaf(5);
//
//
// regTreeConfig.setNumSplitIntervals(1000);
// RegressionTree tree = RegTreeTrainer.fit(regTreeConfig,trainSet);
// System.out.println(tree.toString());
//
//
// System.out.println("hard rt");
// System.out.println("training mse = "+ MSE.mse(tree,trainSet));
// System.out.println("test mse = "+ MSE.mse(tree,testSet));
//
// String hardTrainPrediction = Arrays.toString(tree.predict(trainSet)).replace("[","").replace("]","");
// FileUtils.writeStringToFile(new File(TMP,"hardTrainPrediction"),hardTrainPrediction);
// FileUtils.writeStringToFile(new File(TMP,"hardTrainMSE"),""+MSE.mse(tree,trainSet));
//
//
// String hardTestPrediction = Arrays.toString(tree.predict(testSet)).replace("[","").replace("]","");
// FileUtils.writeStringToFile(new File(TMP,"hardTestPrediction"),hardTestPrediction);
// FileUtils.writeStringToFile(new File(TMP,"hardTestMSE"),""+MSE.mse(tree,testSet));
//
// SoftRegStumpTrainer trainer = SoftRegStumpTrainer.getBuilder()
// .setDataSet(trainSet)
// .setLabels(trainSet.getLabels())
// .setFeatureType(SoftRegStumpTrainer.FeatureType.FOLLOW_HARD_TREE_FEATURE)
// .setLossType(SoftRegStumpTrainer.LossType.SquaredLossOfExpectation)
// .build();
//
// Optimizer optimizer = trainer.getOptimizer();
// optimizer.getTerminator().setMode(Terminator.Mode.STANDARD);
// optimizer.getTerminator().setMaxIteration(10000);
//
// SoftRegStump softRegStump = trainer.train();
// System.out.println("prob rt");
// System.out.println("training mse = "+ MSE.mse(softRegStump,trainSet));
// System.out.println("test mse = "+ MSE.mse(softRegStump,testSet));
// System.out.println(softRegStump.toString());
//
//
// String softTrainPrediction = Arrays.toString(softRegStump.predict(trainSet)).replace("[","").replace("]","");
// FileUtils.writeStringToFile(new File(TMP,"softTrainPrediction"),softTrainPrediction);
// FileUtils.writeStringToFile(new File(TMP,"softTrainMSE"),""+MSE.mse(softRegStump,trainSet));
//
//
// String softTestPrediction = Arrays.toString(softRegStump.predict(testSet)).replace("[","").replace("]","");
// FileUtils.writeStringToFile(new File(TMP,"softTestPrediction"),softTestPrediction);
// FileUtils.writeStringToFile(new File(TMP,"softTestMSE"),""+MSE.mse(softRegStump,testSet));
//
// StringBuilder sb = new StringBuilder();
// sb.append(((Sigmoid) softRegStump.getGatingFunction()).getWeights().get(0));
// sb.append(",");
// sb.append(((Sigmoid) softRegStump.getGatingFunction()).getBias());
// sb.append(",");
// sb.append(softRegStump.getLeftOutput());
// sb.append(",");
// sb.append(softRegStump.getRightOutput());
//
// FileUtils.writeStringToFile(new File(TMP,"curve"),sb.toString());
//
// }
private static void test2() {
RegressionSynthesizer regressionSynthesizer = RegressionSynthesizer.getBuilder().setNumDataPoints(100).build();
// RegDataSet trainSet = RegressionSynthesizer.univarStep();
// RegDataSet testSet = RegressionSynthesizer.univarStep();
// RegDataSet trainSet = RegressionSynthesizer.univarSine();
// RegDataSet testSet = RegressionSynthesizer.univarSine();
// RegDataSet trainSet = RegressionSynthesizer.univarLine();
// RegDataSet testSet = RegressionSynthesizer.univarLine();
// RegDataSet trainSet = RegressionSynthesizer.univarQuadratic();
// RegDataSet testSet = RegressionSynthesizer.univarQuadratic();
RegDataSet trainSet = regressionSynthesizer.univarExp();
RegDataSet testSet = regressionSynthesizer.univarExp();
TRECFormat.save(trainSet, new File(TMP, "train.trec"));
TRECFormat.save(testSet, new File(TMP, "test.trec"));
}
Aggregations