use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.
the class MultiLabelSynthesizer method crfArgmaxDrop.
public static MultiLabelClfDataSet crfArgmaxDrop() {
int numData = 1000;
int numClass = 4;
int numFeature = 10;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
List<MultiLabel> support = Enumerator.enumerate(numClass);
CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
// drop labels
double[] alphas = { 1, 0.9, 0.8, 0.7 };
// assign labels
for (int i = 0; i < numData; i++) {
// System.out.println(dataSet.getRow(i));
MultiLabel label = predictor.predict(dataSet.getRow(i)).copy();
for (int l = 0; l < numClass; l++) {
if (Math.random() > alphas[l] && label.matchClass(l)) {
// System.out.println("drop");
label.removeLabel(l);
}
}
dataSet.setLabels(i, label);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.
the class MultiLabelSynthesizer method flipTwo.
public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass) {
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
for (int j = 0; j < numFeature; j++) {
vector.set(j, Sampling.doubleUniform(-1, 1));
}
weights[k] = vector;
}
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
// assign labels
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
if (dot >= 0) {
dataSet.addLabel(i, k);
}
}
}
// flip
for (int i = 0; i < numData; i++) {
int toChange = Sampling.intUniform(0, numClass - 1);
MultiLabel label = dataSet.getMultiLabels()[i];
if (label.matchClass(toChange)) {
label.removeLabel(toChange);
} else {
label.addLabel(toChange);
}
if (toChange == 0) {
int another = Sampling.intUniform(1, numClass - 1);
if (label.matchClass(another)) {
label.removeLabel(another);
} else {
label.addLabel(another);
}
}
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.
the class MultiLabelSynthesizer method crfSample.
public static MultiLabelClfDataSet crfSample() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
List<MultiLabel> support = Enumerator.enumerate(numClass);
CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
SamplingPredictor samplingPredictor = new SamplingPredictor(cmlcrf);
// assign labels
for (int i = 0; i < numData; i++) {
MultiLabel label = samplingPredictor.predict(dataSet.getRow(i));
dataSet.setLabels(i, label);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.
the class MultiLabelSynthesizer method crfArgmaxHide.
public static MultiLabelClfDataSet crfArgmaxHide() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
List<MultiLabel> support = Enumerator.enumerate(numClass);
CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
// assign labels
for (int i = 0; i < numData; i++) {
MultiLabel label = predictor.predict(dataSet.getRow(i));
dataSet.setLabels(i, label);
}
// hide one feature
for (int i = 0; i < numData; i++) {
dataSet.setFeatureValue(i, 0, 0);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabel in project pyramid by cheng-li.
the class CMLCRFTest method test6.
private static void test6() throws Exception {
MultiLabelClfDataSet dataSet = TRECFormat.loadMultiLabelClfDataSet(new File(DATASETS, "medical/train"), DataSetType.ML_CLF_SPARSE, true);
MultiLabelClfDataSet testSet = TRECFormat.loadMultiLabelClfDataSet(new File(DATASETS, "medical/test"), DataSetType.ML_CLF_SPARSE, true);
CMLCRF cmlcrf = new CMLCRF(dataSet);
CRFLoss crfLoss = new CRFLoss(cmlcrf, dataSet, 1);
MultiLabel[] predTrain;
MultiLabel[] predTest;
LBFGS optimizer = new LBFGS(crfLoss);
for (int i = 0; i < 50; i++) {
// System.out.print("Obj: " + optimizer.getTerminator().getLastValue());
System.out.println("iter: " + i);
optimizer.iterate();
System.out.println(crfLoss.getValue());
predTrain = cmlcrf.predict(dataSet);
predTest = cmlcrf.predict(testSet);
System.out.print("\tTrain acc: " + Accuracy.accuracy(dataSet.getMultiLabels(), predTrain));
System.out.print("\tTrain overlap " + Overlap.overlap(dataSet.getMultiLabels(), predTrain));
System.out.print("\tTest acc: " + Accuracy.accuracy(testSet.getMultiLabels(), predTest));
System.out.println("\tTest overlap " + Overlap.overlap(testSet.getMultiLabels(), predTest));
// System.out.println("crf = "+cmlcrf.getWeights());
// System.out.println(Arrays.toString(predTrain));
}
}
Aggregations