Search in sources :

Example 1 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class IMLGradientBoostingTest method test5.

private static void test5() {
    IMLGradientBoosting boosting = new IMLGradientBoosting(2);
    boosting.addRegressor(new ConstantRegressor(1), 0);
    boosting.addRegressor(new ConstantRegressor(-1), 1);
    Vector vector = new DenseVector(2);
    MultiLabel label1 = new MultiLabel().addLabel(0);
    MultiLabel label2 = new MultiLabel().addLabel(1);
    MultiLabel label3 = new MultiLabel();
    MultiLabel label4 = new MultiLabel().addLabel(0).addLabel(1);
    List<MultiLabel> assignments = new ArrayList<>();
    assignments.add(label1);
    assignments.add(label2);
    //        assignments.add(label3);
    //        assignments.add(label4);
    boosting.setAssignments(assignments);
    System.out.println(boosting.predictAssignmentProbWithoutConstraint(vector, label1));
    System.out.println(boosting.predictAssignmentProbWithConstraint(vector, label1));
//        for (MultiLabel multiLabel: boosting.getAssignments()){
//            System.out.println("multilabel = "+multiLabel);
//            System.out.println("prob = "+boosting.predictAssignmentProbWithConstraint(vector,multiLabel));
//        }
}
Also used : ConstantRegressor(edu.neu.ccs.pyramid.regression.ConstantRegressor) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector)

Example 2 with DenseVector

use of org.apache.mahout.math.DenseVector in project elephant-bird by twitter.

the class VectorWritableConverter method convertSparseVectorDataToVector.

private Vector convertSparseVectorDataToVector(Tuple value) throws IOException {
    Vector v;
    // determine output vector size and fetch bag containing entries from input
    int size = 0;
    DataBag entries = null;
    if (value.size() == 2) {
        // cardinality defined by input
        size = (Integer) value.get(0);
        if (cardinality != null) {
            // cardinality defined by VectorWritableConverter instance
            size = cardinality;
        }
        entries = (DataBag) value.get(1);
    } else {
        Preconditions.checkNotNull(cardinality, "Cardinality is undefined");
        size = cardinality;
        entries = (DataBag) value.get(0);
    }
    // create vector, allowing conversion of sparse input vector data to dense output vector
    if (dense) {
        // TODO(Andy Schlaikjer): Test for OOM before it happens
        v = new DenseVector(size);
    } else {
        // more efficient to build sparse vector with this impl
        v = new RandomAccessSparseVector(size);
    }
    // populate vector
    for (Tuple entry : entries) {
        validateSparseVectorEntryData(entry);
        int i = (Integer) entry.get(0);
        // check index bounds
        if (i < 0 || i >= size) {
            counterHelper.incrCounter(Counter.INDEX_OUT_OF_BOUNDS, 1);
            continue;
        }
        double n = ((Number) entry.get(1)).doubleValue();
        v.setQuick(i, n);
    }
    // convert to (sparse) sequential vector if requested
    if (sequential) {
        v = new SequentialAccessSparseVector(v);
    }
    return v;
}
Also used : RandomAccessSparseVector(org.apache.mahout.math.RandomAccessSparseVector) DataBag(org.apache.pig.data.DataBag) SequentialAccessSparseVector(org.apache.mahout.math.SequentialAccessSparseVector) DenseVector(org.apache.mahout.math.DenseVector) RandomAccessSparseVector(org.apache.mahout.math.RandomAccessSparseVector) Vector(org.apache.mahout.math.Vector) DenseVector(org.apache.mahout.math.DenseVector) Tuple(org.apache.pig.data.Tuple) SequentialAccessSparseVector(org.apache.mahout.math.SequentialAccessSparseVector)

Example 3 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class RegTreeTrainer method splitNode.

/**
 * split a splitable node
 * @param leafToSplit
 * @param regTreeConfig
 * @param dataSet
 */
private static void splitNode(RegressionTree tree, Node leafToSplit, RegTreeConfig regTreeConfig, DataSet dataSet, double[] labels) {
    int numDataPoints = dataSet.getNumDataPoints();
    /**
     * split this leaf node
     */
    int featureIndex = leafToSplit.getFeatureIndex();
    double threshold = leafToSplit.getThreshold();
    Vector inputVector = dataSet.getColumn(featureIndex);
    Vector columnVector;
    if (inputVector.isDense()) {
        columnVector = inputVector;
    } else {
        columnVector = new DenseVector(inputVector);
    }
    /**
     * create children
     */
    Node leftChild = new Node();
    leftChild.setId(tree.numNodes);
    tree.numNodes += 1;
    Node rightChild = new Node();
    rightChild.setId(tree.numNodes);
    tree.numNodes += 1;
    double[] parentProbs = leafToSplit.getProbs();
    double[] leftProbs = new double[numDataPoints];
    double[] rightProbs = new double[numDataPoints];
    IntStream intStream = IntStream.range(0, numDataPoints);
    if (regTreeConfig.isParallel()) {
        intStream = intStream.parallel();
    }
    intStream.forEach(i -> {
        double featureValue = columnVector.get(i);
        if (Double.isNaN(featureValue)) {
            // go to both branches probabilistically
            leftProbs[i] = parentProbs[i] * leafToSplit.getLeftProb();
            rightProbs[i] = parentProbs[i] * leafToSplit.getRightProb();
        } else {
            // <= go left, > go right
            if (featureValue <= threshold) {
                leftProbs[i] = parentProbs[i];
                rightProbs[i] = 0;
            } else {
                leftProbs[i] = 0;
                rightProbs[i] = parentProbs[i];
            }
        }
    });
    leftChild.setProbs(leftProbs);
    rightChild.setProbs(rightProbs);
    // the last two leaves need not to be updated completely
    // as we don't need to split them later
    int maxNumLeaves = regTreeConfig.getMaxNumLeaves();
    if (tree.leaves.size() != maxNumLeaves - 1) {
        updateNode(leftChild, regTreeConfig, dataSet, labels);
        updateNode(rightChild, regTreeConfig, dataSet, labels);
    }
    /**
     * link left and right child to the parent
     */
    leafToSplit.setLeftChild(leftChild);
    leafToSplit.setRightChild(rightChild);
    /**
     * update leaves, remove the parent, and add children
     */
    leafToSplit.setLeaf(false);
    leafToSplit.clearProbs();
    tree.leaves.remove(leafToSplit);
    leftChild.setLeaf(true);
    rightChild.setLeaf(true);
    tree.leaves.add(leftChild);
    tree.leaves.add(rightChild);
    tree.allNodes.add(leftChild);
    tree.allNodes.add(rightChild);
}
Also used : DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) IntStream(java.util.stream.IntStream) DenseVector(org.apache.mahout.math.DenseVector)

Example 4 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class MultiLabelSynthesizer method independentNoise.

/**
 * y0: w=(0,1)
 * y1: w=(1,1)
 * y2: w=(1,0)
 * y3: w=(1,-1)
 * @return
 */
public static MultiLabelClfDataSet independentNoise() {
    int numData = 10000;
    int numClass = 4;
    int numFeature = 2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k = 0; k < numClass; k++) {
        Vector vector = new DenseVector(numFeature);
        weights[k] = vector;
    }
    weights[0].set(0, 0);
    weights[0].set(1, 1);
    weights[1].set(0, 1);
    weights[1].set(1, 1);
    weights[2].set(0, 1);
    weights[2].set(1, 0);
    weights[3].set(0, 1);
    weights[3].set(1, -1);
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    NormalDistribution[] noises = new NormalDistribution[4];
    noises[0] = new NormalDistribution(0, 0.1);
    noises[1] = new NormalDistribution(0, 0.1);
    noises[2] = new NormalDistribution(0, 0.1);
    noises[3] = new NormalDistribution(0, 0.1);
    // assign labels
    int numFlipped = 0;
    for (int i = 0; i < numData; i++) {
        for (int k = 0; k < numClass; k++) {
            double dot = weights[k].dot(dataSet.getRow(i));
            double score = dot + noises[k].sample();
            if (score >= 0) {
                dataSet.addLabel(i, k);
            }
            if (dot * score < 0) {
                numFlipped += 1;
            }
        }
    }
    System.out.println("number of flipped = " + numFlipped);
    return dataSet;
}
Also used : NormalDistribution(org.apache.commons.math3.distribution.NormalDistribution) MultivariateNormalDistribution(org.apache.commons.math3.distribution.MultivariateNormalDistribution) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Example 5 with DenseVector

use of org.apache.mahout.math.DenseVector in project pyramid by cheng-li.

the class MultiLabelSynthesizer method flipTwo.

public static MultiLabelClfDataSet flipTwo(int numData, int numFeature, int numClass) {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numFeatures(numFeature).numClasses(numClass).numDataPoints(numData).build();
    // generate weights
    Vector[] weights = new Vector[numClass];
    for (int k = 0; k < numClass; k++) {
        Vector vector = new DenseVector(numFeature);
        for (int j = 0; j < numFeature; j++) {
            vector.set(j, Sampling.doubleUniform(-1, 1));
        }
        weights[k] = vector;
    }
    // generate features
    for (int i = 0; i < numData; i++) {
        for (int j = 0; j < numFeature; j++) {
            dataSet.setFeatureValue(i, j, Sampling.doubleUniform(-1, 1));
        }
    }
    // assign labels
    for (int i = 0; i < numData; i++) {
        for (int k = 0; k < numClass; k++) {
            double dot = weights[k].dot(dataSet.getRow(i));
            if (dot >= 0) {
                dataSet.addLabel(i, k);
            }
        }
    }
    // flip
    for (int i = 0; i < numData; i++) {
        int toChange = Sampling.intUniform(0, numClass - 1);
        MultiLabel label = dataSet.getMultiLabels()[i];
        if (label.matchClass(toChange)) {
            label.removeLabel(toChange);
        } else {
            label.addLabel(toChange);
        }
        if (toChange == 0) {
            int another = Sampling.intUniform(1, numClass - 1);
            if (label.matchClass(another)) {
                label.removeLabel(another);
            } else {
                label.addLabel(another);
            }
        }
    }
    return dataSet;
}
Also used : MultiLabel(edu.neu.ccs.pyramid.dataset.MultiLabel) DenseVector(org.apache.mahout.math.DenseVector) Vector(org.apache.mahout.math.Vector) MultiLabelClfDataSet(edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet) DenseVector(org.apache.mahout.math.DenseVector)

Aggregations

DenseVector (org.apache.mahout.math.DenseVector)79 Vector (org.apache.mahout.math.Vector)73 MultiLabel (edu.neu.ccs.pyramid.dataset.MultiLabel)9 RandomAccessSparseVector (org.apache.mahout.math.RandomAccessSparseVector)8 MultiLabelClfDataSet (edu.neu.ccs.pyramid.dataset.MultiLabelClfDataSet)7 SequentialAccessSparseVector (org.apache.mahout.math.SequentialAccessSparseVector)6 Pair (edu.neu.ccs.pyramid.util.Pair)4 List (java.util.List)3 IntStream (java.util.stream.IntStream)3 EnumeratedIntegerDistribution (org.apache.commons.math3.distribution.EnumeratedIntegerDistribution)3 LogisticRegression (edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression)2 DataSet (edu.neu.ccs.pyramid.dataset.DataSet)2 EmpiricalCDF (edu.neu.ccs.pyramid.util.EmpiricalCDF)2 IntegerDistribution (org.apache.commons.math3.distribution.IntegerDistribution)2 MultivariateNormalDistribution (org.apache.commons.math3.distribution.MultivariateNormalDistribution)2 Classifier (edu.neu.ccs.pyramid.classification.Classifier)1 Weights (edu.neu.ccs.pyramid.classification.logistic_regression.Weights)1 RegDataSet (edu.neu.ccs.pyramid.dataset.RegDataSet)1 ConstantRegressor (edu.neu.ccs.pyramid.regression.ConstantRegressor)1 BernoulliDistribution (edu.neu.ccs.pyramid.util.BernoulliDistribution)1