Search in sources :

Example 1 with LSHModelData

use of com.alibaba.alink.operator.common.similarity.modeldata.LSHModelData in project Alink by alibaba.

the class LSHModelDataConverter method loadModelData.

@Override
public LSHModelData loadModelData(List<Row> list) {
    Map<Integer, List<Object>> indexMap = new HashMap<>();
    Map<Object, Vector> data = new HashMap<>();
    for (Row row : list) {
        if (row.getField(BUCKETS_INDEX) != null) {
            Tuple2<Integer, List<Object>> tuple2 = JsonConverter.fromJson((String) row.getField(BUCKETS_INDEX), new TypeReference<Tuple2<Integer, List<Object>>>() {
            }.getType());
            indexMap.put(tuple2.f0, tuple2.f1);
        } else if (row.getField(DATA_IDNEX) != null) {
            Tuple2<Object, String> tuple3 = JsonConverter.fromJson((String) row.getField(DATA_IDNEX), new TypeReference<Tuple2<Object, String>>() {
            }.getType());
            data.put(tuple3.f0, VectorUtil.getVector(tuple3.f1));
        }
    }
    BaseLSH lsh;
    if (meta.get(VectorApproxNearestNeighborTrainParams.METRIC).equals(VectorApproxNearestNeighborTrainParams.Metric.JACCARD)) {
        lsh = new MinHashLSH(meta.get(MinHashLSH.RAND_COEFFICIENTS_A), meta.get(MinHashLSH.RAND_COEFFICIENTS_B));
    } else {
        lsh = new BucketRandomProjectionLSH(meta.get(BucketRandomProjectionLSH.RAND_VECTORS), meta.get(BucketRandomProjectionLSH.RAND_NUMBER), meta.get(BucketRandomProjectionLSH.PROJECTION_WIDTH));
    }
    return new LSHModelData(indexMap, data, lsh);
}
Also used : HashMap(java.util.HashMap) LSHModelData(com.alibaba.alink.operator.common.similarity.modeldata.LSHModelData) BaseLSH(com.alibaba.alink.operator.common.similarity.lsh.BaseLSH) MinHashLSH(com.alibaba.alink.operator.common.similarity.lsh.MinHashLSH) Tuple2(org.apache.flink.api.java.tuple.Tuple2) BucketRandomProjectionLSH(com.alibaba.alink.operator.common.similarity.lsh.BucketRandomProjectionLSH) ArrayList(java.util.ArrayList) List(java.util.List) Row(org.apache.flink.types.Row) TypeReference(org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.type.TypeReference) Vector(com.alibaba.alink.common.linalg.Vector)

Aggregations

Vector (com.alibaba.alink.common.linalg.Vector)1 BaseLSH (com.alibaba.alink.operator.common.similarity.lsh.BaseLSH)1 BucketRandomProjectionLSH (com.alibaba.alink.operator.common.similarity.lsh.BucketRandomProjectionLSH)1 MinHashLSH (com.alibaba.alink.operator.common.similarity.lsh.MinHashLSH)1 LSHModelData (com.alibaba.alink.operator.common.similarity.modeldata.LSHModelData)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)1 TypeReference (org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.type.TypeReference)1 Row (org.apache.flink.types.Row)1