use of com.tencent.angel.ml.math2.vector.IntDoubleVector in project angel by Tencent.
the class RegTDataStore method init.
public void init(DataBlock<LabeledData> dataSet) throws IOException {
numRow = dataSet.size();
numCol = param.numFeature;
numNonzero = param.numNonzero;
instances = new IntFloatVector[numRow];
labels = new float[numRow];
preds = new float[numRow];
weights = new float[numRow];
baseWeights = new float[numRow];
// max and min of each feature
double[] minFeatures = new double[numCol];
double[] maxFeatures = new double[numCol];
Arrays.setAll(minFeatures, i -> 0.0f);
Arrays.setAll(maxFeatures, i -> Float.MAX_VALUE);
dataSet.resetReadIndex();
LabeledData data;
IntFloatVector x = null;
double y;
for (int idx = 0; idx < dataSet.size(); idx++) {
data = dataSet.read();
if (data.getX() instanceof IntFloatVector) {
x = (IntFloatVector) data.getX();
} else if (data.getX() instanceof IntDoubleVector) {
x = VFactory.sparseFloatVector((int) data.getX().dim(), ((IntDoubleVector) data.getX()).getStorage().getIndices(), Maths.double2Float(((IntDoubleVector) data.getX()).getStorage().getValues()));
}
y = data.getY();
if (y != 1) {
y = 0;
}
int[] indices = x.getStorage().getIndices();
float[] values = x.getStorage().getValues();
for (int i = 0; i < indices.length; i++) {
int fid = indices[i];
double fvalue = values[i];
if (fvalue > maxFeatures[fid]) {
maxFeatures[fid] = fvalue;
}
if (fvalue < minFeatures[fid]) {
minFeatures[fid] = fvalue;
}
}
instances[idx] = x;
labels[idx] = (float) y;
preds[idx] = 0.0f;
weights[idx] = 1.0f;
baseWeights[idx] = 1.0f;
}
featureMeta = new FeatureMeta(numCol, Maths.double2Float(minFeatures), Maths.double2Float(maxFeatures));
}
use of com.tencent.angel.ml.math2.vector.IntDoubleVector in project angel by Tencent.
the class GradHistHelper method buildHistogram.
public IntDoubleVector buildHistogram(int insStart, int insEnd) {
// 1. new feature's histogram (grad + hess)
// size: sampled_featureNum * (2 * splitNum)
// in other words, concatenate each feature's histogram
int featureNum = this.controller.fSet.length;
int splitNum = this.controller.param.numSplit;
IntDoubleVector histogram = new IntDoubleVector(featureNum * 2 * splitNum, new IntDoubleDenseVectorStorage(new double[featureNum * 2 * splitNum]));
// 2. get the span of this node
int nodeStart = insStart;
// inclusive
int nodeEnd = insEnd;
LOG.debug(String.format("Build histogram of node[%d]: size[%d] instance span [%d - %d]", this.nid, histogram.getDim(), nodeStart, nodeEnd));
// ------ 3. using sparse-aware method to build histogram ---
// first add grads of all instances to the zero bin of all features, then loop the non-zero entries of all the instances
float gradSum = 0.0f;
float hessSum = 0.0f;
long parseInstanceTime = 0;
long startTime = System.currentTimeMillis();
for (int idx = nodeStart; idx <= nodeEnd; idx++) {
// 3.1. get the instance index
int insIdx = this.controller.instancePos[idx];
// 3.2. get the grad and hess of the instance
GradPair gradPair = this.controller.gradPairs[insIdx];
// 3.3. add to the sum
gradSum += gradPair.getGrad();
hessSum += gradPair.getHess();
IntFloatVector instance = this.controller.trainDataStore.instances[insIdx];
int numNnz = instance.getStorage().getIndices().length;
long tmpTime = System.currentTimeMillis();
int[] indices = instance.getStorage().getIndices();
float[] values = instance.getStorage().getValues();
parseInstanceTime += System.currentTimeMillis() - tmpTime;
// 3.4. loop the non-zero entries
for (int i = 0; i < numNnz; i++) {
int fid = indices[i];
// 3.4.1. get feature value
float fv = values[i];
// 3.4.2. current feature's position in the sampled feature set
// int fPos = findFidPlace(this.controller.fSet, fid);
int fPos = this.controller.fPos[fid];
if (fPos == -1) {
continue;
}
// 3.4.3. find the position of feature value in a histogram
// the search area in the sketch is [fid * #splitNum, (fid+1) * #splitNum - 1]
int start = fid * splitNum;
// inclusive
int end;
if (this.controller.cateFeatNum.containsKey(fid)) {
end = start + this.controller.cateFeatNum.get(fid) - 1;
} else {
end = start + splitNum - 1;
}
int fValueIdx = findFvaluePlace(this.controller.sketches, fv, start, end);
assert fValueIdx >= 0 && fValueIdx < splitNum;
int gradIdx = 2 * splitNum * fPos + fValueIdx;
int hessIdx = gradIdx + splitNum;
// 3.4.4. add the grad and hess to the corresponding bin
histogram.set(gradIdx, histogram.get(gradIdx) + gradPair.getGrad());
histogram.set(hessIdx, histogram.get(hessIdx) + gradPair.getHess());
// 3.4.5. add the reverse to the bin that contains 0.0f
int fZeroValueIdx = findFvaluePlace(this.controller.sketches, 0.0f, start, end);
assert fZeroValueIdx >= 0 && fZeroValueIdx < splitNum;
int gradZeroIdx = 2 * splitNum * fPos + fZeroValueIdx;
int hessZeroIdx = gradZeroIdx + splitNum;
double curGrad = histogram.get(gradZeroIdx);
double curHess = histogram.get(hessZeroIdx);
histogram.set(gradZeroIdx, curGrad - gradPair.getGrad());
histogram.set(hessZeroIdx, curHess - gradPair.getHess());
}
}
// 4. add the grad and hess sum to the zero bin of all features
for (int fid = 0; fid < featureNum; fid++) {
int fPos = findFidPlace(this.controller.fSet, fid);
if (fPos == -1) {
continue;
}
int start = fPos * splitNum;
int end;
if (this.controller.cateFeatNum.containsKey(fid)) {
end = start + this.controller.cateFeatNum.get(fid) - 1;
} else {
end = start + splitNum - 1;
}
int fZeroValueIdx = findFvaluePlace(this.controller.sketches, 0.0f, start, end);
int gradZeroIdx = 2 * splitNum * fPos + fZeroValueIdx;
int hessZeroIdx = 2 * splitNum * fPos + fZeroValueIdx + splitNum;
histogram.set(gradZeroIdx, histogram.get(gradZeroIdx) + gradSum);
histogram.set(hessZeroIdx, histogram.get(hessZeroIdx) + hessSum);
}
LOG.debug(String.format("Build histogram cost %d ms, parse instance cost %d ms", System.currentTimeMillis() - startTime, parseInstanceTime));
return histogram;
}
use of com.tencent.angel.ml.math2.vector.IntDoubleVector in project angel by Tencent.
the class MixedBinaryInNonZAExecutor method apply.
private static Vector apply(CompIntDoubleVector v1, IntDoubleVector v2, Binary op) {
IntDoubleVector[] parts = v1.getPartitions();
Storage[] resParts = StorageSwitch.applyComp(v1, v2, op);
if (v2.isDense()) {
double[] v2Values = v2.getStorage().getValues();
int base = 0, k = 0;
for (IntDoubleVector part : parts) {
IntDoubleVectorStorage resPart = (IntDoubleVectorStorage) resParts[k];
double[] newValues = resPart.getValues();
if (part.isDense()) {
double[] partValue = part.getStorage().getValues();
for (int i = 0; i < partValue.length; i++) {
int idx = i + base;
newValues[i] = op.apply(partValue[i], v2Values[idx]);
}
} else if (part.isSparse()) {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
for (int i = 0; i < part.getDim(); i++) {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
ObjectIterator<Int2DoubleMap.Entry> iter = part.getStorage().entryIterator();
while (iter.hasNext()) {
Int2DoubleMap.Entry entry = iter.next();
int idx = entry.getIntKey();
resPart.set(idx, op.apply(entry.getDoubleValue(), v2Values[idx + base]));
}
} else {
for (int i = 0; i < newValues.length; i++) {
if (part.getStorage().hasKey(i)) {
resPart.set(i, op.apply(part.get(i), v2Values[i + base]));
} else {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int dim = part.getDim();
int[] resIndices = resPart.getIndices();
double[] resValues = resPart.getValues();
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int i = 0; i < dim; i++) {
resIndices[i] = i;
resValues[i] = op.apply(0, v2Values[i]);
}
int size = v1.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
resValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int i = 0; i < part.getDim(); i++) {
newValues[i] = op.apply(0, v2Values[i + base]);
}
int size = part.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
newValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
IntDoubleVectorStorage partStorage = part.getStorage();
for (int i = 0; i < newValues.length; i++) {
if (partStorage.hasKey(i)) {
newValues[i] = op.apply(partStorage.get(i), v2Values[i + base]);
} else {
newValues[i] = op.apply(0, v2Values[i + base]);
}
}
}
}
}
base += part.getDim();
k++;
}
} else if (v2.isSparse()) {
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntDoubleSortedVectorStorage) {
resParts[i] = new IntDoubleSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
ObjectIterator<Int2DoubleMap.Entry> iter = v2.getStorage().entryIterator();
while (iter.hasNext()) {
Int2DoubleMap.Entry entry = iter.next();
int gidx = entry.getIntKey();
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntDoubleVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), entry.getDoubleValue()));
}
} else {
// sorted
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntDoubleSortedVectorStorage) {
resParts[i] = new IntDoubleSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
int[] v2Indices = v2.getStorage().getIndices();
double[] v2Values = v2.getStorage().getValues();
for (int i = 0; i < v2Indices.length; i++) {
int gidx = v2Indices[i];
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntDoubleVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), v2Values[i]));
}
}
IntDoubleVector[] res = new IntDoubleVector[parts.length];
int i = 0;
for (IntDoubleVector part : parts) {
res[i] = new IntDoubleVector(part.getMatrixId(), part.getRowId(), part.getClock(), part.getDim(), (IntDoubleVectorStorage) resParts[i]);
i++;
}
v1.setPartitions(res);
return v1;
}
use of com.tencent.angel.ml.math2.vector.IntDoubleVector in project angel by Tencent.
the class MixedBinaryInZAExecutor method apply.
private static Vector apply(CompIntDoubleVector v1, IntIntVector v2, Binary op) {
IntDoubleVector[] parts = v1.getPartitions();
Storage[] resParts = StorageSwitch.applyComp(v1, v2, op);
if (v2.isDense()) {
int base = 0;
int[] v2Values = v2.getStorage().getValues();
for (int i = 0; i < parts.length; i++) {
IntDoubleVector part = parts[i];
IntDoubleVectorStorage resPart = (IntDoubleVectorStorage) resParts[i];
if (part.isDense()) {
double[] resPartValues = resPart.getValues();
double[] partValues = part.getStorage().getValues();
for (int j = 0; j < partValues.length; j++) {
resPartValues[j] = op.apply(partValues[j], v2Values[base + j]);
}
} else if (part.isSparse()) {
ObjectIterator<Int2DoubleMap.Entry> iter = part.getStorage().entryIterator();
while (iter.hasNext()) {
Int2DoubleMap.Entry entry = iter.next();
int idx = entry.getIntKey();
resPart.set(idx, op.apply(entry.getDoubleValue(), v2Values[idx + base]));
}
} else {
// sorted
if (op.isKeepStorage()) {
int[] resPartIndices = resPart.getIndices();
double[] resPartValues = resPart.getValues();
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
resPartIndices[j] = idx;
resPartValues[j] = op.apply(partValues[j], v2Values[idx + base]);
}
} else {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
resPart.set(idx, op.apply(partValues[j], v2Values[idx + base]));
}
}
}
base += part.getDim();
}
} else if (v2.isSparse()) {
ObjectIterator<Int2IntMap.Entry> iter = v2.getStorage().entryIterator();
if (v1.size() > v2.size()) {
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
while (iter.hasNext()) {
Int2IntMap.Entry entry = iter.next();
int idx = entry.getIntKey();
int pidx = (int) (idx / subDim);
int subidx = idx % subDim;
if (parts[pidx].hasKey(subidx)) {
((IntDoubleVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), entry.getIntValue()));
}
}
} else {
int base = 0;
for (int i = 0; i < parts.length; i++) {
IntDoubleVector part = parts[i];
IntDoubleVectorStorage resPart = (IntDoubleVectorStorage) resParts[i];
if (part.isDense()) {
double[] partValues = part.getStorage().getValues();
double[] resPartValues = resPart.getValues();
for (int j = 0; j < partValues.length; j++) {
if (v2.hasKey(j + base)) {
resPartValues[j] = op.apply(partValues[j], v2.get(j + base));
}
}
} else if (part.isSparse()) {
ObjectIterator<Int2DoubleMap.Entry> piter = part.getStorage().entryIterator();
while (piter.hasNext()) {
Int2DoubleMap.Entry entry = piter.next();
int idx = entry.getIntKey();
if (v2.hasKey(idx + base)) {
resPart.set(idx, op.apply(entry.getDoubleValue(), v2.get(idx + base)));
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
int[] resPartIndices = resPart.getIndices();
double[] resPartValues = resPart.getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
if (v2.hasKey(idx + base)) {
resPartIndices[j] = idx;
resPartValues[j] = op.apply(partValues[j], v2.get(idx + base));
}
}
} else {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
if (v2.hasKey(idx + base)) {
resPart.set(idx, op.apply(partValues[j], v2.get(idx + base)));
}
}
}
}
base += part.getDim();
}
}
} else {
// sorted
if (v1.size() > v2.size()) {
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
int[] v2Indices = v2.getStorage().getIndices();
int[] v2Values = v2.getStorage().getValues();
for (int i = 0; i < v2Indices.length; i++) {
int idx = v2Indices[i];
int pidx = (int) (idx / subDim);
int subidx = idx % subDim;
if (parts[pidx].hasKey(subidx)) {
((IntDoubleVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), v2Values[i]));
}
}
} else {
int base = 0;
for (int i = 0; i < parts.length; i++) {
IntDoubleVector part = parts[i];
IntDoubleVectorStorage resPart = (IntDoubleVectorStorage) resParts[i];
if (part.isDense()) {
double[] partValues = part.getStorage().getValues();
double[] resPartValues = resPart.getValues();
for (int j = 0; j < partValues.length; j++) {
if (v2.hasKey(j + base)) {
resPartValues[j] = op.apply(partValues[j], v2.get(j + base));
}
}
} else if (part.isSparse()) {
ObjectIterator<Int2DoubleMap.Entry> piter = part.getStorage().entryIterator();
while (piter.hasNext()) {
Int2DoubleMap.Entry entry = piter.next();
int idx = entry.getIntKey();
if (v2.hasKey(idx + base)) {
resPart.set(idx, op.apply(entry.getDoubleValue(), v2.get(idx + base)));
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
int[] resPartIndices = resPart.getIndices();
double[] resPartValues = resPart.getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
if (v2.hasKey(idx + base)) {
resPartIndices[j] = idx;
resPartValues[j] = op.apply(partValues[j], v2.get(idx + base));
}
}
} else {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
if (v2.hasKey(idx + base)) {
resPart.set(idx, op.apply(partValues[j], v2.get(idx + base)));
}
}
}
}
base += part.getDim();
}
}
}
IntDoubleVector[] res = new IntDoubleVector[parts.length];
int i = 0;
for (IntDoubleVector part : parts) {
res[i] = new IntDoubleVector(part.getMatrixId(), part.getRowId(), part.getClock(), part.getDim(), (IntDoubleVectorStorage) resParts[i]);
i++;
}
v1.setPartitions(res);
return v1;
}
use of com.tencent.angel.ml.math2.vector.IntDoubleVector in project angel by Tencent.
the class MixedBinaryInZAExecutor method apply.
private static Vector apply(CompIntDoubleVector v1, IntDummyVector v2, Binary op) {
IntDoubleVector[] parts = v1.getPartitions();
Storage[] resParts = StorageSwitch.applyComp(v1, v2, op);
if (v1.size() > v2.size()) {
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
int[] v2Indices = v2.getIndices();
for (int i = 0; i < v2Indices.length; i++) {
int idx = v2Indices[i];
int pidx = (int) (idx / subDim);
int subidx = idx % subDim;
if (parts[pidx].hasKey(subidx)) {
((IntDoubleVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), 1));
}
}
} else {
int base = 0;
for (int i = 0; i < parts.length; i++) {
IntDoubleVector part = parts[i];
IntDoubleVectorStorage resPart = (IntDoubleVectorStorage) resParts[i];
if (part.isDense()) {
double[] partValues = part.getStorage().getValues();
double[] resPartValues = resPart.getValues();
for (int j = 0; j < partValues.length; j++) {
if (v2.hasKey(j + base)) {
resPartValues[j] = op.apply(partValues[j], v2.get(j + base));
}
}
} else if (part.isSparse()) {
ObjectIterator<Int2DoubleMap.Entry> piter = part.getStorage().entryIterator();
while (piter.hasNext()) {
Int2DoubleMap.Entry entry = piter.next();
int idx = entry.getIntKey();
if (v2.hasKey(idx + base)) {
resPart.set(idx, op.apply(entry.getDoubleValue(), v2.get(idx + base)));
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
int[] resPartIndices = resPart.getIndices();
double[] resPartValues = resPart.getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
if (v2.hasKey(idx + base)) {
resPartIndices[j] = idx;
resPartValues[j] = op.apply(partValues[j], v2.get(idx + base));
}
}
} else {
int[] partIndices = part.getStorage().getIndices();
double[] partValues = part.getStorage().getValues();
for (int j = 0; j < partIndices.length; j++) {
int idx = partIndices[j];
if (v2.hasKey(idx + base)) {
resPart.set(idx, op.apply(partValues[j], v2.get(idx + base)));
}
}
}
}
base += part.getDim();
}
}
IntDoubleVector[] res = new IntDoubleVector[parts.length];
int i = 0;
for (IntDoubleVector part : parts) {
res[i] = new IntDoubleVector(part.getMatrixId(), part.getRowId(), part.getClock(), part.getDim(), (IntDoubleVectorStorage) resParts[i]);
i++;
}
v1.setPartitions(res);
return v1;
}
Aggregations