use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class IMLGradientBoostingTest method test5.
private static void test5() {
IMLGradientBoosting boosting = new IMLGradientBoosting(2);
boosting.addRegressor(new ConstantRegressor(1), 0);
boosting.addRegressor(new ConstantRegressor(-1), 1);
Vector vector = new DenseVector(2);
MultiLabel label1 = new MultiLabel().addLabel(0);
MultiLabel label2 = new MultiLabel().addLabel(1);
MultiLabel label3 = new MultiLabel();
MultiLabel label4 = new MultiLabel().addLabel(0).addLabel(1);
List<MultiLabel> assignments = new ArrayList<>();
assignments.add(label1);
assignments.add(label2);
// assignments.add(label3);
// assignments.add(label4);
boosting.setAssignments(assignments);
System.out.println(boosting.predictAssignmentProbWithoutConstraint(vector, label1));
System.out.println(boosting.predictAssignmentProbWithConstraint(vector, label1));
// for (MultiLabel multiLabel: boosting.getAssignments()){
// System.out.println("multilabel = "+multiLabel);
// System.out.println("prob = "+boosting.predictAssignmentProbWithConstraint(vector,multiLabel));
// }
}
use of org.apache.mahout.math.DenseVector in project elephant-bird by twitter.
the class VectorWritableConverter method convertSparseVectorDataToVector.
private Vector convertSparseVectorDataToVector(Tuple value) throws IOException {
Vector v;
// determine output vector size and fetch bag containing entries from input
int size = 0;
DataBag entries = null;
if (value.size() == 2) {
// cardinality defined by input
size = (Integer) value.get(0);
if (cardinality != null) {
// cardinality defined by VectorWritableConverter instance
size = cardinality;
}
entries = (DataBag) value.get(1);
} else {
Preconditions.checkNotNull(cardinality, "Cardinality is undefined");
size = cardinality;
entries = (DataBag) value.get(0);
}
// create vector, allowing conversion of sparse input vector data to dense output vector
if (dense) {
// TODO(Andy Schlaikjer): Test for OOM before it happens
v = new DenseVector(size);
} else {
// more efficient to build sparse vector with this impl
v = new RandomAccessSparseVector(size);
}
// populate vector
for (Tuple entry : entries) {
validateSparseVectorEntryData(entry);
int i = (Integer) entry.get(0);
// check index bounds
if (i < 0 || i >= size) {
counterHelper.incrCounter(Counter.INDEX_OUT_OF_BOUNDS, 1);
continue;
}
double n = ((Number) entry.get(1)).doubleValue();
v.setQuick(i, n);
}
// convert to (sparse) sequential vector if requested
if (sequential) {
v = new SequentialAccessSparseVector(v);
}
return v;
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class RegTreeTrainer method splitNode.
/**
* split a splitable node
* @param leafToSplit
* @param regTreeConfig
* @param dataSet
*/
private static void splitNode(RegressionTree tree, Node leafToSplit, RegTreeConfig regTreeConfig, DataSet dataSet, double[] labels) {
int numDataPoints = dataSet.getNumDataPoints();
/**
* split this leaf node
*/
int featureIndex = leafToSplit.getFeatureIndex();
double threshold = leafToSplit.getThreshold();
Vector inputVector = dataSet.getColumn(featureIndex);
Vector columnVector;
if (inputVector.isDense()) {
columnVector = inputVector;
} else {
columnVector = new DenseVector(inputVector);
}
/**
* create children
*/
Node leftChild = new Node();
leftChild.setId(tree.numNodes);
tree.numNodes += 1;
Node rightChild = new Node();
rightChild.setId(tree.numNodes);
tree.numNodes += 1;
double[] parentProbs = leafToSplit.getProbs();
double[] leftProbs = new double[numDataPoints];
double[] rightProbs = new double[numDataPoints];
IntStream intStream = IntStream.range(0, numDataPoints);
if (regTreeConfig.isParallel()) {
intStream = intStream.parallel();
}
intStream.forEach(i -> {
double featureValue = columnVector.get(i);
if (Double.isNaN(featureValue)) {
// go to both branches probabilistically
leftProbs[i] = parentProbs[i] * leafToSplit.getLeftProb();
rightProbs[i] = parentProbs[i] * leafToSplit.getRightProb();
} else {
// <= go left, > go right
if (featureValue <= threshold) {
leftProbs[i] = parentProbs[i];
rightProbs[i] = 0;
} else {
leftProbs[i] = 0;
rightProbs[i] = parentProbs[i];
}
}
});
leftChild.setProbs(leftProbs);
rightChild.setProbs(rightProbs);
// the last two leaves need not to be updated completely
// as we don't need to split them later
int maxNumLeaves = regTreeConfig.getMaxNumLeaves();
if (tree.leaves.size() != maxNumLeaves - 1) {
updateNode(leftChild, regTreeConfig, dataSet, labels);
updateNode(rightChild, regTreeConfig, dataSet, labels);
}
/**
* link left and right child to the parent
*/
leafToSplit.setLeftChild(leftChild);
leafToSplit.setRightChild(rightChild);
/**
* update leaves, remove the parent, and add children
*/
leafToSplit.setLeaf(false);
leafToSplit.clearProbs();
tree.leaves.remove(leafToSplit);
leftChild.setLeaf(true);
rightChild.setLeaf(true);
tree.leaves.add(leftChild);
tree.leaves.add(rightChild);
tree.allNodes.add(leftChild);
tree.allNodes.add(rightChild);
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class MultiLabelSynthesizer method independentNoise.
/**
* y0: w=(0,1)
* y1: w=(1,1)
* y2: w=(1,0)
* y3: w=(1,-1)
* @return
*/
public static MultiLabelClfDataSet independentNoise() {
int numData = 10000;
int numClass = 4;
int numFeature = 2;
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
weights[k] = vector;
}
weights[0].set(0, 0);
weights[0].set(1, 1);
weights[1].set(0, 1);
weights[1].set(1, 1);
weights[2].set(0, 1);
weights[2].set(1, 0);
weights[3].set(0, 1);
weights[3].set(1, -1);
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
NormalDistribution[] noises = new NormalDistribution[4];
noises[0] = new NormalDistribution(0, 0.1);
noises[1] = new NormalDistribution(0, 0.1);
noises[2] = new NormalDistribution(0, 0.1);
noises[3] = new NormalDistribution(0, 0.1);
// assign labels
int numFlipped = 0;
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
double score = dot + noises[k].sample();
if (score >= 0) {
dataSet.addLabel(i, k);
}
if (dot * score < 0) {
numFlipped += 1;
}
}
}
System.out.println("number of flipped = " + numFlipped);
return dataSet;
}
use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.
the class MultiLabelSynthesizer method flipTwo.
public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass) {
MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
// generate weights
Vector[] weights = new Vector[numClass];
for (int k = 0; k < numClass; k++) {
Vector vector = new DenseVector(numFeature);
for (int j = 0; j < numFeature; j++) {
vector.set(j, Sampling.doubleUniform(-1, 1));
}
weights[k] = vector;
}
// generate features
for (int i = 0; i < numData; i++) {
for (int j = 0; j < numFeature; j++) {
dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
}
}
// assign labels
for (int i = 0; i < numData; i++) {
for (int k = 0; k < numClass; k++) {
double dot = weights[k].dot(dataSet.getRow(i));
if (dot >= 0) {
dataSet.addLabel(i, k);
}
}
}
// flip
for (int i = 0; i < numData; i++) {
int toChange = Sampling.intUniform(0, numClass - 1);
MultiLabel label = dataSet.getMultiLabels()[i];
if (label.matchClass(toChange)) {
label.removeLabel(toChange);
} else {
label.addLabel(toChange);
}
if (toChange == 0) {
int another = Sampling.intUniform(1, numClass - 1);
if (label.matchClass(another)) {
label.removeLabel(another);
} else {
label.addLabel(another);
}
}
}
return dataSet;
}
Aggregations