use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class FastDistanceTest method initSparseData.
private static List<Vector> initSparseData(int len, int size) {
Random random = new Random();
List<Vector> list = new ArrayList<>();
for (int i = 0; i < len; i++) {
list.add(new SparseVector(size, new int[] { i % size, (i + 1) % size }, new double[] { random.nextDouble(), random.nextDouble() }));
}
return list;
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class FastDistanceTest method initSparseDataWithoutSize.
private static List<Vector> initSparseDataWithoutSize(int len, int size) {
Random random = new Random();
List<Vector> list = new ArrayList<>();
for (int i = 0; i < len; i++) {
list.add(new SparseVector(-1, new int[] { i % size, (i + 1) % size }, new double[] { random.nextDouble(), random.nextDouble() }));
}
return list;
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class FastDistanceTest method testSparseVectorTupleInput.
@Test
public void testSparseVectorTupleInput() {
Vector vec = new SparseVector(10, new int[] { 1, 2 }, new double[] { 1.0, 1.0 });
EuclideanDistance distance = new EuclideanDistance();
FastDistanceVectorData vectorData = distance.prepareVectorData(Tuple2.of(vec, Row.of(0, "a")));
assertVectorInput(vectorData, vec, Row.of(0, "a"));
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class SparseVectorSummarizer method visit.
/**
* update by vector.
*/
@Override
public BaseVectorSummarizer visit(Vector vec) {
SparseVector sv;
if (vec instanceof DenseVector) {
DenseVector dv = (DenseVector) vec;
int[] indices = new int[dv.size()];
for (int i = 0; i < dv.size(); i++) {
indices[i] = i;
}
sv = new SparseVector(dv.size(), indices, dv.getData());
} else {
sv = (SparseVector) vec;
}
count++;
this.colNum = Math.max(this.colNum, sv.size());
if (sv.numberOfValues() != 0) {
// max index + 1 for size.
VectorIterator iter = sv.iterator();
while (iter.hasNext()) {
int index = iter.getIndex();
double value = iter.getValue();
if (cols.containsKey(index)) {
cols.get(index).visit(value);
} else {
VectorStatCol statCol = new VectorStatCol();
statCol.visit(value);
cols.put(index, statCol);
}
iter.next();
}
if (calculateOuterProduct) {
int size = sv.getIndices()[sv.getIndices().length - 1] + 1;
if (outerProduct == null) {
outerProduct = DenseMatrix.zeros(size, size);
} else {
if (size > outerProduct.numRows()) {
DenseMatrix dpNew = DenseMatrix.zeros(size, size);
if (outerProduct != null) {
outerProduct = VectorSummarizerUtil.plusEqual(dpNew, outerProduct);
}
}
}
for (int i = 0; i < sv.getIndices().length; i++) {
double val = sv.getValues()[i];
int iIdx = sv.getIndices()[i];
for (int j = 0; j < sv.getIndices().length; j++) {
outerProduct.add(iIdx, sv.getIndices()[j], val * sv.getValues()[j]);
}
}
}
}
return this;
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class MultilayerPerceptronTrainBatchOp method getTrainingSamples.
/**
* Get training samples from input data.
*/
private static DataSet<Tuple2<Double, DenseVector>> getTrainingSamples(BatchOperator data, DataSet<Tuple2<Long, Object>> labels, DataSet<DenseVector> maxAbs, final String[] featureColNames, final String vectorColName, final String labelColName, final int vecSize) {
final boolean isVectorInput = !StringUtils.isNullOrWhitespaceOnly(vectorColName);
final int vectorColIdx = isVectorInput ? TableUtil.findColIndexWithAssertAndHint(data.getColNames(), vectorColName) : -1;
final int[] featureColIdx = isVectorInput ? null : TableUtil.findColIndicesWithAssertAndHint(data.getSchema(), featureColNames);
final int labelColIdx = TableUtil.findColIndexWithAssertAndHint(data.getColNames(), labelColName);
DataSet<Row> dataRows = data.getDataSet();
return dataRows.map(new RichMapFunction<Row, Tuple2<Double, DenseVector>>() {
private static final long serialVersionUID = -2883936655064900395L;
transient Map<Comparable, Long> label2index;
private DenseVector maxAbs;
@Override
public void open(Configuration parameters) throws Exception {
List<Tuple2<Long, Object>> bcLabels = getRuntimeContext().getBroadcastVariable("labels");
this.label2index = new HashMap<>();
bcLabels.forEach(t2 -> {
Long index = t2.f0;
Comparable label = (Comparable) t2.f1;
this.label2index.put(label, index);
});
maxAbs = (DenseVector) getRuntimeContext().getBroadcastVariable("maxAbs").get(0);
for (int i = 0; i < maxAbs.size(); ++i) {
if (maxAbs.get(i) == 0) {
maxAbs.set(i, 1.0);
}
}
}
@Override
public Tuple2<Double, DenseVector> map(Row value) throws Exception {
Comparable label = (Comparable) value.getField(labelColIdx);
Long labelIdx = this.label2index.get(label);
if (labelIdx == null) {
throw new RuntimeException("unknown label: " + label);
}
if (isVectorInput) {
Vector vec = VectorUtil.getVector(value.getField(vectorColIdx));
DenseVector finalVec = null;
if (null == vec) {
return new Tuple2<>(labelIdx.doubleValue(), null);
} else {
if (vec instanceof DenseVector) {
finalVec = (DenseVector) vec;
for (int i = 0; i < maxAbs.size(); ++i) {
finalVec.set(i, finalVec.get(i) / maxAbs.get(i));
}
} else {
SparseVector tmpVec = (SparseVector) vec;
tmpVec.setSize(vecSize);
finalVec = tmpVec.toDenseVector();
int[] indices = ((SparseVector) vec).getIndices();
for (int i = 0; i < indices.length; ++i) {
finalVec.set(indices[i], finalVec.get(indices[i]) / maxAbs.get(indices[i]));
}
}
}
return new Tuple2<>(labelIdx.doubleValue(), finalVec);
} else {
int n = featureColIdx.length;
DenseVector features = new DenseVector(n);
for (int i = 0; i < n; i++) {
double v = ((Number) value.getField(featureColIdx[i])).doubleValue();
features.set(i, v / maxAbs.get(i));
}
return Tuple2.of(labelIdx.doubleValue(), features);
}
}
}).withBroadcastSet(labels, "labels").withBroadcastSet(maxAbs, "maxAbs");
}
Aggregations