use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.
the class MultiLabelSynthesizer method independentNoise.
/**
* y0: w=(0,1)
* y1: w=(1,1)
* y2: w=(1,0)
* y3: w=(1,-1)
* @return
*/
public static MultiLabelClfDataSet independentNoise() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
weights[k] = vector;
}
weights[0].set(0, 0);
weights[0].set(1, 1);
weights[1].set(0, 1);
weights[1].set(1, 1);
weights[2].set(0, 1);
weights[2].set(1, 0);
weights[3].set(0, 1);
weights[3].set(1, -1);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
NormalDistribution[] noises = new NormalDistribution[4];
noises[0] = new NormalDistribution(0, 0.1);
noises[1] = new NormalDistribution(0, 0.1);
noises[2] = new NormalDistribution(0, 0.1);
noises[3] = new NormalDistribution(0, 0.1);
// assign labels
int numFlipped = 0;
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
double score = dot + noises[k].sample();
if (score >= 0) {
dataSet.addLabel(i, k);
}
if (dot * score < 0) {
numFlipped += 1;
}
}
}
System.out.println("number of flipped = " + numFlipped);
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.
the class MultiLabelSynthesizer method crfArgmaxDrop.
public static MultiLabelClfDataSet crfArgmaxDrop() {
int numData = 1000;
int numClass = 4;
int numFeature = 10;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
List<MultiLabel> support = Enumerator.enumerate(numClass);
CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
// drop labels
double[] alphas = { 1, 0.9, 0.8, 0.7 };
// assign labels
for (int i = 0; i < numData; i++) {
// System.out.println(dataSet.getRow(i));
MultiLabel label = predictor.predict(dataSet.getRow(i)).copy();
for (int l = 0; l < numClass; l++) {
if (Math.random() > alphas[l] && label.matchClass(l)) {
// System.out.println("drop");
label.removeLabel(l);
}
}
dataSet.setLabels(i, label);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.
the class MultiLabelSynthesizer method flipTwo.
public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass) {
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
for (int j = 0; j < numFeature; j++) {
vector.set(j, Sampling.doubleUniform(-1, 1));
}
weights[k] = vector;
}
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
// assign labels
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
if (dot >= 0) {
dataSet.addLabel(i, k);
}
}
}
// flip
for (int i = 0; i < numData; i++) {
int toChange = Sampling.intUniform(0, numClass - 1);
MultiLabel label = dataSet.getMultiLabels()[i];
if (label.matchClass(toChange)) {
label.removeLabel(toChange);
} else {
label.addLabel(toChange);
}
if (toChange == 0) {
int another = Sampling.intUniform(1, numClass - 1);
if (label.matchClass(another)) {
label.removeLabel(another);
} else {
label.addLabel(another);
}
}
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.
the class MultiLabelSynthesizer method crfSample.
public static MultiLabelClfDataSet crfSample() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
List<MultiLabel> support = Enumerator.enumerate(numClass);
CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
SamplingPredictor samplingPredictor = new SamplingPredictor(cmlcrf);
// assign labels
for (int i = 0; i < numData; i++) {
MultiLabel label = samplingPredictor.predict(dataSet.getRow(i));
dataSet.setLabels(i, label);
}
return dataSet;
}
use of edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet in project pyramid by cheng-li.
the class MultiLabelSynthesizer method crfArgmaxHide.
public static MultiLabelClfDataSet crfArgmaxHide() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
List<MultiLabel> support = Enumerator.enumerate(numClass);
CMLCRF cmlcrf = new CMLCRF(numClass, numFeature, support);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(0, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(0).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(1).set(1, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(0, 10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(2).set(1, 0);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(0, -10);
cmlcrf.getWeights().getWeightsWithoutBiasForClass(3).set(1, -10);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
SubsetAccPredictor predictor = new SubsetAccPredictor(cmlcrf);
// assign labels
for (int i = 0; i < numData; i++) {
MultiLabel label = predictor.predict(dataSet.getRow(i));
dataSet.setLabels(i, label);
}
// hide one feature
for (int i = 0; i < numData; i++) {
dataSet.setFeatureValue(i, 0, 0);
}
return dataSet;
}
Aggregations