use of com.tencent.angel.ml.GBDT.algo.FeatureMeta in project angel by Tencent.
the class RegTDataStore method init.
public void init(DataBlock<LabeledData> dataSet) throws IOException {
numRow = dataSet.size();
numCol = param.numFeature;
numNonzero = param.numNonzero;
instances = new IntFloatVector[numRow];
labels = new float[numRow];
preds = new float[numRow];
weights = new float[numRow];
baseWeights = new float[numRow];
// max and min of each feature
double[] minFeatures = new double[numCol];
double[] maxFeatures = new double[numCol];
Arrays.setAll(minFeatures, i -> 0.0f);
Arrays.setAll(maxFeatures, i -> Float.MAX_VALUE);
dataSet.resetReadIndex();
LabeledData data;
IntFloatVector x = null;
double y;
for (int idx = 0; idx < dataSet.size(); idx++) {
data = dataSet.read();
if (data.getX() instanceof IntFloatVector) {
x = (IntFloatVector) data.getX();
} else if (data.getX() instanceof IntDoubleVector) {
x = VFactory.sparseFloatVector((int) data.getX().dim(), ((IntDoubleVector) data.getX()).getStorage().getIndices(), Maths.double2Float(((IntDoubleVector) data.getX()).getStorage().getValues()));
}
y = data.getY();
if (y != 1) {
y = 0;
}
int[] indices = x.getStorage().getIndices();
float[] values = x.getStorage().getValues();
for (int i = 0; i < indices.length; i++) {
int fid = indices[i];
double fvalue = values[i];
if (fvalue > maxFeatures[fid]) {
maxFeatures[fid] = fvalue;
}
if (fvalue < minFeatures[fid]) {
minFeatures[fid] = fvalue;
}
}
instances[idx] = x;
labels[idx] = (float) y;
preds[idx] = 0.0f;
weights[idx] = 1.0f;
baseWeights[idx] = 1.0f;
}
featureMeta = new FeatureMeta(numCol, Maths.double2Float(minFeatures), Maths.double2Float(maxFeatures));
}
Aggregations