Search in sources :

Example 1 with FeatureMeta

use of com.tencent.angel.ml.GBDT.algo.FeatureMeta in project angel by Tencent.

the class RegTDataStore method init.

public void init(DataBlock<LabeledData> dataSet) throws IOException {
    numRow = dataSet.size();
    numCol = param.numFeature;
    numNonzero = param.numNonzero;
    instances = new IntFloatVector[numRow];
    labels = new float[numRow];
    preds = new float[numRow];
    weights = new float[numRow];
    baseWeights = new float[numRow];
    // max and min of each feature
    double[] minFeatures = new double[numCol];
    double[] maxFeatures = new double[numCol];
    Arrays.setAll(minFeatures, i -> 0.0f);
    Arrays.setAll(maxFeatures, i -> Float.MAX_VALUE);
    dataSet.resetReadIndex();
    LabeledData data;
    IntFloatVector x = null;
    double y;
    for (int idx = 0; idx < dataSet.size(); idx++) {
        data = dataSet.read();
        if (data.getX() instanceof IntFloatVector) {
            x = (IntFloatVector) data.getX();
        } else if (data.getX() instanceof IntDoubleVector) {
            x = VFactory.sparseFloatVector((int) data.getX().dim(), ((IntDoubleVector) data.getX()).getStorage().getIndices(), Maths.double2Float(((IntDoubleVector) data.getX()).getStorage().getValues()));
        }
        y = data.getY();
        if (y != 1) {
            y = 0;
        }
        int[] indices = x.getStorage().getIndices();
        float[] values = x.getStorage().getValues();
        for (int i = 0; i < indices.length; i++) {
            int fid = indices[i];
            double fvalue = values[i];
            if (fvalue > maxFeatures[fid]) {
                maxFeatures[fid] = fvalue;
            }
            if (fvalue < minFeatures[fid]) {
                minFeatures[fid] = fvalue;
            }
        }
        instances[idx] = x;
        labels[idx] = (float) y;
        preds[idx] = 0.0f;
        weights[idx] = 1.0f;
        baseWeights[idx] = 1.0f;
    }
    featureMeta = new FeatureMeta(numCol, Maths.double2Float(minFeatures), Maths.double2Float(maxFeatures));
}
Also used : LabeledData(com.tencent.angel.ml.feature.LabeledData) FeatureMeta(com.tencent.angel.ml.GBDT.algo.FeatureMeta) IntFloatVector(com.tencent.angel.ml.math2.vector.IntFloatVector) IntDoubleVector(com.tencent.angel.ml.math2.vector.IntDoubleVector)

Aggregations

FeatureMeta (com.tencent.angel.ml.GBDT.algo.FeatureMeta)1 LabeledData (com.tencent.angel.ml.feature.LabeledData)1 IntDoubleVector (com.tencent.angel.ml.math2.vector.IntDoubleVector)1 IntFloatVector (com.tencent.angel.ml.math2.vector.IntFloatVector)1