use of edu.neu.ccs.pyramid.dataset.ClfDataSet in project pyramid by cheng-li.
the class ClassificationSynthesizer method multivarLine.
public ClfDataSet multivarLine() {
ClfDataSet dataSet = ClfDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(numFeatures).numClasses(2).dense(true).missingValue(false).build();
for (int i = 0; i < numDataPoints; i++) {
for (int j = 0; j < numFeatures; j++) {
double featureValue = Sampling.doubleUniform(0, 1);
dataSet.setFeatureValue(i, j, featureValue);
}
double sum = 0;
for (int j = 0; j < numFeatures; j++) {
sum += dataSet.getRow(i).get(j);
}
sum += noise.sample();
if (sum >= numFeatures / 2.0) {
dataSet.setLabel(i, 1);
} else {
dataSet.setLabel(i, 0);
}
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.ClfDataSet in project pyramid by cheng-li.
the class L2BoostTest method loadTest.
static void loadTest() throws Exception {
ClfDataSet dataSet = TRECFormat.loadClfDataSet(new File(DATASETS, "/spam/trec_data/test.trec"), DataSetType.CLF_SPARSE, true);
L2Boost boost = (L2Boost) Serialization.deserialize(new File(TMP, "boost"));
double accuracy = Accuracy.accuracy(boost, dataSet);
System.out.println("accuracy=" + accuracy);
}
use of edu.neu.ccs.pyramid.dataset.ClfDataSet in project pyramid by cheng-li.
the class ClassificationSynthesizerTest method test1.
private static void test1() {
ClassificationSynthesizer classificationSynthesizer = ClassificationSynthesizer.getBuilder().setNumDataPoints(1000).setNumFeatures(2).setNoiseSD(0.00000001).build();
ClfDataSet trainSet = classificationSynthesizer.multivarLine();
ClfDataSet testSet = classificationSynthesizer.multivarLine();
TRECFormat.save(trainSet, new File(TMP, "line1/train.trec"));
TRECFormat.save(testSet, new File(TMP, "line1/test.trec"));
RidgeLogisticTrainer trainer = RidgeLogisticTrainer.getBuilder().setGaussianPriorVariance(1).build();
LogisticRegression logisticRegression = trainer.train(trainSet);
System.out.println(Accuracy.accuracy(logisticRegression, trainSet));
System.out.println(Accuracy.accuracy(logisticRegression, testSet));
System.out.println(logisticRegression.getWeights().getWeightsForClass(0));
}
use of edu.neu.ccs.pyramid.dataset.ClfDataSet in project pyramid by cheng-li.
the class ClassificationSynthesizerTest method test2.
private static void test2() {
ClassificationSynthesizer classificationSynthesizer = ClassificationSynthesizer.getBuilder().setNumDataPoints(100).setNumFeatures(2).setNoiseSD(0.00000001).build();
ClfDataSet trainSet = classificationSynthesizer.multivarLine();
ClfDataSet testSet = classificationSynthesizer.multivarLine();
TRECFormat.save(trainSet, new File(TMP, "line2/train.trec"));
TRECFormat.save(testSet, new File(TMP, "line2/test.trec"));
RidgeLogisticTrainer trainer = RidgeLogisticTrainer.getBuilder().setGaussianPriorVariance(1).build();
LogisticRegression logisticRegression = trainer.train(trainSet);
System.out.println(Accuracy.accuracy(logisticRegression, trainSet));
System.out.println(Accuracy.accuracy(logisticRegression, testSet));
System.out.println(logisticRegression.getWeights().getWeightsForClass(0));
}
use of edu.neu.ccs.pyramid.dataset.ClfDataSet in project pyramid by cheng-li.
the class ClassificationSynthesizerTest method test3.
private static void test3() {
ClassificationSynthesizer classificationSynthesizer = ClassificationSynthesizer.getBuilder().setNumDataPoints(1000).setNumFeatures(2).setNoiseSD(0.1).build();
ClfDataSet trainSet = classificationSynthesizer.multivarLine();
ClfDataSet testSet = classificationSynthesizer.multivarLine();
TRECFormat.save(trainSet, new File(TMP, "line3/train.trec"));
TRECFormat.save(testSet, new File(TMP, "line3/test.trec"));
RidgeLogisticTrainer trainer = RidgeLogisticTrainer.getBuilder().setGaussianPriorVariance(1).build();
LogisticRegression logisticRegression = trainer.train(trainSet);
System.out.println(Accuracy.accuracy(logisticRegression, trainSet));
System.out.println(Accuracy.accuracy(logisticRegression, testSet));
System.out.println(logisticRegression.getWeights().getWeightsForClass(0));
}
Aggregations