use of com.tencent.angel.ml.math2.vector.IntIntVector in project angel by Tencent.
the class GBDTController method sampleFeature.
// sample feature
public void sampleFeature() throws Exception {
LOG.info("------Sample feature------");
PSModel featSample = model.getPSModel(this.param.sampledFeaturesName);
Set<String> needFlushMatrixSet = new HashSet<String>(1);
if (this.param.colSample < 1 && taskContext.getTaskIndex() == 0) {
long startTime = System.currentTimeMillis();
// push sampled feature set to the current tree
if (this.param.colSample < 1) {
int[] fset = this.trainDataStore.featureMeta.sampleCol(this.param.colSample);
IntIntVector sampleFeatureVector = new IntIntVector(fset.length, new IntIntDenseVectorStorage(fset));
featSample.increment(currentTree, sampleFeatureVector);
needFlushMatrixSet.add(this.param.sampledFeaturesName);
}
LOG.info(String.format("Sample feature cost: %d ms", System.currentTimeMillis() - startTime));
}
clockAllMatrix(needFlushMatrixSet, true);
}
use of com.tencent.angel.ml.math2.vector.IntIntVector in project angel by Tencent.
the class GBDTController method findSplit.
// find split
public void findSplit() throws Exception {
LOG.info("------Find split------");
long startTime = System.currentTimeMillis();
// 1. find responsible tree node, using RR scheme
List<Integer> responsibleTNode = new ArrayList<>();
int activeTNodeNum = 0;
for (int nid = 0; nid < this.activeNode.length; nid++) {
int isActive = this.activeNode[nid];
if (isActive == 1) {
if (this.taskContext.getTaskIndex() == activeTNodeNum) {
responsibleTNode.add(nid);
}
if (++activeTNodeNum >= taskContext.getTotalTaskNum()) {
activeTNodeNum = 0;
}
}
}
int[] tNodeId = Maths.intList2Arr(responsibleTNode);
LOG.info(String.format("Task[%d] responsible tree node: %s", this.taskContext.getTaskId().getIndex(), responsibleTNode.toString()));
// 2. pull gradient histogram
// the updated indices of the parameter on PS
int[] updatedIndices = new int[tNodeId.length];
// the updated split features
int[] updatedSplitFid = new int[tNodeId.length];
// the updated split value
double[] updatedSplitFvalue = new double[tNodeId.length];
// the updated split gain
double[] updatedSplitGain = new double[tNodeId.length];
boolean isServerSplit = taskContext.getConf().getBoolean(MLConf.ML_GBDT_SERVER_SPLIT(), MLConf.DEFAULT_ML_GBDT_SERVER_SPLIT());
int splitNum = taskContext.getConf().getInt(MLConf.ML_GBDT_SPLIT_NUM(), MLConf.DEFAULT_ML_GBDT_SPLIT_NUM());
for (int i = 0; i < tNodeId.length; i++) {
int nid = tNodeId[i];
LOG.debug(String.format("Task[%d] find best split of tree node: %d", this.taskContext.getTaskIndex(), nid));
// 2.1. get the name of this node's gradient histogram on PS
String gradHistName = this.param.gradHistNamePrefix + nid;
// 2.2. pull the histogram
long pullStartTime = System.currentTimeMillis();
PSModel histMat = model.getPSModel(gradHistName);
IntDoubleVector histogram = null;
SplitEntry splitEntry = null;
if (isServerSplit) {
int matrixId = histMat.getMatrixId();
GBDTGradHistGetRowFunc func = new GBDTGradHistGetRowFunc(new HistAggrParam(matrixId, 0, param.numSplit, param.minChildWeight, param.regAlpha, param.regLambda));
splitEntry = ((GBDTGradHistGetRowResult) histMat.get(func)).getSplitEntry();
} else {
histogram = (IntDoubleVector) histMat.getRow(0);
LOG.debug("Get grad histogram without server split mode, histogram size" + histogram.getDim());
}
LOG.info(String.format("Pull histogram from PS cost %d ms", System.currentTimeMillis() - pullStartTime));
GradHistHelper histHelper = new GradHistHelper(this, nid);
// 2.3. find best split result of this tree node
if (this.param.isServerSplit) {
// 2.3.1 using server split
if (splitEntry.getFid() != -1) {
int trueSplitFid = this.fSet[splitEntry.getFid()];
int splitIdx = (int) splitEntry.getFvalue();
float trueSplitValue = this.sketches[trueSplitFid * this.param.numSplit + splitIdx];
LOG.info(String.format("Best split of node[%d]: feature[%d], value[%f], " + "true feature[%d], true value[%f], losschg[%f]", nid, splitEntry.getFid(), splitEntry.getFvalue(), trueSplitFid, trueSplitValue, splitEntry.getLossChg()));
splitEntry.setFid(trueSplitFid);
splitEntry.setFvalue(trueSplitValue);
}
// update the grad stats of the root node on PS, only called once by leader worker
if (nid == 0) {
GradStats rootStats = new GradStats(splitEntry.leftGradStat);
rootStats.add(splitEntry.rightGradStat);
this.updateNodeGradStats(nid, rootStats);
}
// update the grad stats of children node
if (splitEntry.fid != -1) {
// update the left child
this.updateNodeGradStats(2 * nid + 1, splitEntry.leftGradStat);
// update the right child
this.updateNodeGradStats(2 * nid + 2, splitEntry.rightGradStat);
}
// 2.3.2 the updated split result (tree node/feature/value/gain) on PS,
updatedIndices[i] = nid;
updatedSplitFid[i] = splitEntry.fid;
updatedSplitFvalue[i] = splitEntry.fvalue;
updatedSplitGain[i] = splitEntry.lossChg;
} else {
// 2.3.3 otherwise, the returned histogram contains the gradient info
splitEntry = histHelper.findBestSplit(histogram);
LOG.info(String.format("Best split of node[%d]: feature[%d], value[%f], losschg[%f]", nid, splitEntry.getFid(), splitEntry.getFvalue(), splitEntry.getLossChg()));
// 2.3.4 the updated split result (tree node/feature/value/gain) on PS,
updatedIndices[i] = nid;
updatedSplitFid[i] = splitEntry.fid;
updatedSplitFvalue[i] = splitEntry.fvalue;
updatedSplitGain[i] = splitEntry.lossChg;
}
// 2.3.5 reset this tree node's gradient histogram to 0
histMat.zero();
}
// 3. push split feature to PS
IntIntVector splitFeatureVector = new IntIntVector(this.activeNode.length, new IntIntDenseVectorStorage(this.activeNode.length));
// 4. push split value to PS
IntDoubleVector splitValueVector = new IntDoubleVector(this.activeNode.length, new IntDoubleDenseVectorStorage(this.activeNode.length));
// 5. push split gain to PS
IntDoubleVector splitGainVector = new IntDoubleVector(this.activeNode.length, new IntDoubleDenseVectorStorage(this.activeNode.length));
for (int i = 0; i < updatedIndices.length; i++) {
splitFeatureVector.set(updatedIndices[i], updatedSplitFid[i]);
splitValueVector.set(updatedIndices[i], updatedSplitFvalue[i]);
splitGainVector.set(updatedIndices[i], updatedSplitGain[i]);
}
PSModel splitFeat = model.getPSModel(this.param.splitFeaturesName);
splitFeat.increment(this.currentTree, splitFeatureVector);
PSModel splitValue = model.getPSModel(this.param.splitValuesName);
splitValue.increment(this.currentTree, splitValueVector);
PSModel splitGain = model.getPSModel(this.param.splitGainsName);
splitGain.increment(this.currentTree, splitGainVector);
// 6. set phase to AFTER_SPLIT
// this.phase = GBDTPhase.AFTER_SPLIT;
LOG.info(String.format("Find split cost: %d ms", System.currentTimeMillis() - startTime));
// clock
Set<String> needFlushMatrixSet = new HashSet<String>(3);
needFlushMatrixSet.add(this.param.splitFeaturesName);
needFlushMatrixSet.add(this.param.splitValuesName);
needFlushMatrixSet.add(this.param.splitGainsName);
needFlushMatrixSet.add(this.param.nodeGradStatsName);
clockAllMatrix(needFlushMatrixSet, true);
}
use of com.tencent.angel.ml.math2.vector.IntIntVector in project angel by Tencent.
the class MixedBinaryOutNonZAExecutor method apply.
private static Vector apply(CompIntLongVector v1, IntIntVector v2, Binary op) {
IntLongVector[] parts = v1.getPartitions();
Storage[] resParts = StorageSwitch.applyComp(v1, v2, op);
if (v2.isDense()) {
int[] v2Values = v2.getStorage().getValues();
int base = 0, k = 0;
for (IntLongVector part : parts) {
IntLongVectorStorage resPart = (IntLongVectorStorage) resParts[k];
long[] newValues = resPart.getValues();
if (part.isDense()) {
long[] partValue = part.getStorage().getValues();
for (int i = 0; i < partValue.length; i++) {
int idx = i + base;
newValues[i] = op.apply(partValue[i], v2Values[idx]);
}
} else if (part.isSparse()) {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
for (int i = 0; i < part.getDim(); i++) {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
ObjectIterator<Int2LongMap.Entry> iter = part.getStorage().entryIterator();
while (iter.hasNext()) {
Int2LongMap.Entry entry = iter.next();
int idx = entry.getIntKey();
resPart.set(idx, op.apply(entry.getLongValue(), v2Values[idx + base]));
}
} else {
for (int i = 0; i < newValues.length; i++) {
if (part.getStorage().hasKey(i)) {
resPart.set(i, op.apply(part.get(i), v2Values[i + base]));
} else {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int dim = part.getDim();
int[] resIndices = resPart.getIndices();
long[] resValues = resPart.getValues();
int[] partIndices = part.getStorage().getIndices();
long[] partValues = part.getStorage().getValues();
for (int i = 0; i < dim; i++) {
resIndices[i] = i;
resValues[i] = op.apply(0, v2Values[i + base]);
}
int size = part.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
resValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
int[] partIndices = part.getStorage().getIndices();
long[] partValues = part.getStorage().getValues();
for (int i = 0; i < part.getDim(); i++) {
newValues[i] = op.apply(0, v2Values[i + base]);
}
int size = part.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
newValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
IntLongVectorStorage partStorage = part.getStorage();
for (int i = 0; i < newValues.length; i++) {
if (partStorage.hasKey(i)) {
newValues[i] = op.apply(partStorage.get(i), v2Values[i + base]);
} else {
newValues[i] = op.apply(0, v2Values[i + base]);
}
}
}
}
}
base += part.getDim();
k++;
}
} else {
if (v2.isSparse()) {
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntLongSortedVectorStorage) {
resParts[i] = new IntLongSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
ObjectIterator<Int2IntMap.Entry> iter = v2.getStorage().entryIterator();
while (iter.hasNext()) {
Int2IntMap.Entry entry = iter.next();
int gidx = entry.getIntKey();
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntLongVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), entry.getIntValue()));
}
} else {
// sorted
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntLongSortedVectorStorage) {
resParts[i] = new IntLongSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
int[] v2Indices = v2.getStorage().getIndices();
int[] v2Values = v2.getStorage().getValues();
for (int i = 0; i < v2Indices.length; i++) {
int gidx = v2Indices[i];
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntLongVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), v2Values[i]));
}
}
}
IntLongVector[] res = new IntLongVector[parts.length];
int i = 0;
for (IntLongVector part : parts) {
res[i] = new IntLongVector(part.getMatrixId(), part.getRowId(), part.getClock(), part.getDim(), (IntLongVectorStorage) resParts[i]);
i++;
}
return new CompIntLongVector(v1.getMatrixId(), v1.getRowId(), v1.getClock(), v1.getDim(), res, v1.getSubDim());
}
use of com.tencent.angel.ml.math2.vector.IntIntVector in project angel by Tencent.
the class MixedBinaryInNonZAExecutor method apply.
private static Vector apply(CompIntFloatVector v1, IntIntVector v2, Binary op) {
IntFloatVector[] parts = v1.getPartitions();
Storage[] resParts = StorageSwitch.applyComp(v1, v2, op);
if (v2.isDense()) {
int[] v2Values = v2.getStorage().getValues();
int base = 0, k = 0;
for (IntFloatVector part : parts) {
IntFloatVectorStorage resPart = (IntFloatVectorStorage) resParts[k];
float[] newValues = resPart.getValues();
if (part.isDense()) {
float[] partValue = part.getStorage().getValues();
for (int i = 0; i < partValue.length; i++) {
int idx = i + base;
newValues[i] = op.apply(partValue[i], v2Values[idx]);
}
} else if (part.isSparse()) {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
for (int i = 0; i < part.getDim(); i++) {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
ObjectIterator<Int2FloatMap.Entry> iter = part.getStorage().entryIterator();
while (iter.hasNext()) {
Int2FloatMap.Entry entry = iter.next();
int idx = entry.getIntKey();
resPart.set(idx, op.apply(entry.getFloatValue(), v2Values[idx + base]));
}
} else {
for (int i = 0; i < newValues.length; i++) {
if (part.getStorage().hasKey(i)) {
resPart.set(i, op.apply(part.get(i), v2Values[i + base]));
} else {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int dim = part.getDim();
int[] resIndices = resPart.getIndices();
float[] resValues = resPart.getValues();
int[] partIndices = part.getStorage().getIndices();
float[] partValues = part.getStorage().getValues();
for (int i = 0; i < dim; i++) {
resIndices[i] = i;
resValues[i] = op.apply(0, v2Values[i]);
}
int size = v1.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
resValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
int[] partIndices = part.getStorage().getIndices();
float[] partValues = part.getStorage().getValues();
for (int i = 0; i < part.getDim(); i++) {
newValues[i] = op.apply(0, v2Values[i + base]);
}
int size = part.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
newValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
IntFloatVectorStorage partStorage = part.getStorage();
for (int i = 0; i < newValues.length; i++) {
if (partStorage.hasKey(i)) {
newValues[i] = op.apply(partStorage.get(i), v2Values[i + base]);
} else {
newValues[i] = op.apply(0, v2Values[i + base]);
}
}
}
}
}
base += part.getDim();
k++;
}
} else if (v2.isSparse()) {
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntFloatSortedVectorStorage) {
resParts[i] = new IntFloatSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
ObjectIterator<Int2IntMap.Entry> iter = v2.getStorage().entryIterator();
while (iter.hasNext()) {
Int2IntMap.Entry entry = iter.next();
int gidx = entry.getIntKey();
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntFloatVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), entry.getIntValue()));
}
} else {
// sorted
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntFloatSortedVectorStorage) {
resParts[i] = new IntFloatSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
int[] v2Indices = v2.getStorage().getIndices();
int[] v2Values = v2.getStorage().getValues();
for (int i = 0; i < v2Indices.length; i++) {
int gidx = v2Indices[i];
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntFloatVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), v2Values[i]));
}
}
IntFloatVector[] res = new IntFloatVector[parts.length];
int i = 0;
for (IntFloatVector part : parts) {
res[i] = new IntFloatVector(part.getMatrixId(), part.getRowId(), part.getClock(), part.getDim(), (IntFloatVectorStorage) resParts[i]);
i++;
}
v1.setPartitions(res);
return v1;
}
use of com.tencent.angel.ml.math2.vector.IntIntVector in project angel by Tencent.
the class MixedBinaryInNonZAExecutor method apply.
private static Vector apply(CompIntLongVector v1, IntIntVector v2, Binary op) {
IntLongVector[] parts = v1.getPartitions();
Storage[] resParts = StorageSwitch.applyComp(v1, v2, op);
if (v2.isDense()) {
int[] v2Values = v2.getStorage().getValues();
int base = 0, k = 0;
for (IntLongVector part : parts) {
IntLongVectorStorage resPart = (IntLongVectorStorage) resParts[k];
long[] newValues = resPart.getValues();
if (part.isDense()) {
long[] partValue = part.getStorage().getValues();
for (int i = 0; i < partValue.length; i++) {
int idx = i + base;
newValues[i] = op.apply(partValue[i], v2Values[idx]);
}
} else if (part.isSparse()) {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
for (int i = 0; i < part.getDim(); i++) {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
ObjectIterator<Int2LongMap.Entry> iter = part.getStorage().entryIterator();
while (iter.hasNext()) {
Int2LongMap.Entry entry = iter.next();
int idx = entry.getIntKey();
resPart.set(idx, op.apply(entry.getLongValue(), v2Values[idx + base]));
}
} else {
for (int i = 0; i < newValues.length; i++) {
if (part.getStorage().hasKey(i)) {
resPart.set(i, op.apply(part.get(i), v2Values[i + base]));
} else {
resPart.set(i, op.apply(0, v2Values[i + base]));
}
}
}
} else {
// sorted
if (op.isKeepStorage()) {
int dim = part.getDim();
int[] resIndices = resPart.getIndices();
long[] resValues = resPart.getValues();
int[] partIndices = part.getStorage().getIndices();
long[] partValues = part.getStorage().getValues();
for (int i = 0; i < dim; i++) {
resIndices[i] = i;
resValues[i] = op.apply(0, v2Values[i]);
}
int size = v1.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
resValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
if (part.size() < Constant.denseLoopThreshold * part.getDim()) {
int[] partIndices = part.getStorage().getIndices();
long[] partValues = part.getStorage().getValues();
for (int i = 0; i < part.getDim(); i++) {
newValues[i] = op.apply(0, v2Values[i + base]);
}
int size = part.size();
for (int i = 0; i < size; i++) {
int idx = partIndices[i];
newValues[idx] = op.apply(partValues[i], v2Values[idx + base]);
}
} else {
IntLongVectorStorage partStorage = part.getStorage();
for (int i = 0; i < newValues.length; i++) {
if (partStorage.hasKey(i)) {
newValues[i] = op.apply(partStorage.get(i), v2Values[i + base]);
} else {
newValues[i] = op.apply(0, v2Values[i + base]);
}
}
}
}
}
base += part.getDim();
k++;
}
} else if (v2.isSparse()) {
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntLongSortedVectorStorage) {
resParts[i] = new IntLongSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
ObjectIterator<Int2IntMap.Entry> iter = v2.getStorage().entryIterator();
while (iter.hasNext()) {
Int2IntMap.Entry entry = iter.next();
int gidx = entry.getIntKey();
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntLongVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), entry.getIntValue()));
}
} else {
// sorted
if (!op.isKeepStorage()) {
for (int i = 0; i < parts.length; i++) {
if (parts[i].getStorage() instanceof IntLongSortedVectorStorage) {
resParts[i] = new IntLongSparseVectorStorage(parts[i].getDim(), parts[i].getStorage().getIndices(), parts[i].getStorage().getValues());
}
}
}
int subDim = (v1.getDim() + v1.getNumPartitions() - 1) / v1.getNumPartitions();
int[] v2Indices = v2.getStorage().getIndices();
int[] v2Values = v2.getStorage().getValues();
for (int i = 0; i < v2Indices.length; i++) {
int gidx = v2Indices[i];
int pidx = (int) (gidx / subDim);
int subidx = gidx % subDim;
((IntLongVectorStorage) resParts[pidx]).set(subidx, op.apply(parts[pidx].get(subidx), v2Values[i]));
}
}
IntLongVector[] res = new IntLongVector[parts.length];
int i = 0;
for (IntLongVector part : parts) {
res[i] = new IntLongVector(part.getMatrixId(), part.getRowId(), part.getClock(), part.getDim(), (IntLongVectorStorage) resParts[i]);
i++;
}
v1.setPartitions(res);
return v1;
}
Aggregations