Search in sources :

Example 41 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class NaiveBayesModelInfo method getCategoryFeatureInfo.

/**
 * This function gets the feature information of categorical features.
 * For each categorical feature, this function calculates the proportion among all the labels.
 */
public HashMap<Object, HashMap<Object, HashMap<Object, Double>>> getCategoryFeatureInfo() {
    MultiStringIndexerModelData model = new MultiStringIndexerModelDataConverter().load(stringIndexerModelSerialized);
    if (model.meta == null || !model.meta.contains(HasSelectedCols.SELECTED_COLS)) {
        return new HashMap<>(0);
    }
    HashMap<Object, HashMap<Object, HashMap<Object, Double>>> labelFeatureMap = new HashMap<>(labelSize);
    String[] cateCols = model.meta.get(HasSelectedCols.SELECTED_COLS);
    int tokenNumber = cateCols.length;
    HashMap<Long, String>[] tokenIndex = new HashMap[tokenNumber];
    for (int i = 0; i < tokenNumber; i++) {
        tokenIndex[i] = new HashMap<>((int) model.getNumberOfTokensOfColumn(cateCols[i]));
    }
    for (Tuple3<Integer, String, Long> tuple3 : model.tokenAndIndex) {
        tokenIndex[tuple3.f0].put(tuple3.f2, tuple3.f1);
    }
    int cateIndex = 0;
    for (int i = 0; i < featureSize; i++) {
        if (isCategorical[i]) {
            String featureName = featureNames[i];
            HashSet<Object> featureValue = new HashSet<>();
            double[] featureSum = new double[Math.toIntExact(model.getNumberOfTokensOfColumn(cateCols[cateIndex]))];
            for (int j = 0; j < labelSize; j++) {
                SparseVector sv = featureInfo[j][i];
                int[] svIndices = sv.getIndices();
                double[] svValues = sv.getValues();
                // the value number of this feature.
                int feaValNum = svIndices.length;
                for (int k = 0; k < feaValNum; k++) {
                    featureSum[svIndices[k]] += svValues[k];
                }
            }
            for (int j = 0; j < labelSize; j++) {
                SparseVector sv = featureInfo[j][i];
                int[] svIndices = sv.getIndices();
                double[] svValues = sv.getValues();
                int feaValNum = svIndices.length;
                HashMap<Object, HashMap<Object, Double>> v;
                if (!labelFeatureMap.containsKey(labels[j])) {
                    v = new HashMap<>();
                } else {
                    v = labelFeatureMap.get(labels[j]);
                }
                HashMap<Object, Double> featureValues = new HashMap<>();
                for (int k = 0; k < feaValNum; k++) {
                    Object key = tokenIndex[cateIndex].get((long) svIndices[k]);
                    featureValue.add(key);
                    double value = svValues[k] / featureSum[svIndices[k]];
                    featureValues.put(key, value);
                }
                v.put(featureName, featureValues);
                labelFeatureMap.put(labels[j], v);
            }
            cateIndex++;
            cateFeatureValue.put(featureName, featureValue);
        }
    }
    // transform
    List<String> listFeature = new ArrayList<>();
    for (int i = 0; i < featureSize; i++) {
        if (isCategorical[i]) {
            listFeature.add(featureNames[i]);
        }
    }
    HashMap<Object, HashMap<Object, HashMap<Object, Double>>> res = new HashMap<>(featureSize);
    for (String o : listFeature) {
        HashMap<Object, HashMap<Object, Double>> labelMap = new HashMap<>(labelSize);
        for (Object label : labels) {
            labelMap.put(label, labelFeatureMap.get(label).get(o));
        }
        res.put(o, labelMap);
    }
    return res;
}
Also used : MultiStringIndexerModelDataConverter(com.alibaba.alink.operator.common.dataproc.MultiStringIndexerModelDataConverter) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SparseVector(com.alibaba.alink.common.linalg.SparseVector) MultiStringIndexerModelData(com.alibaba.alink.operator.common.dataproc.MultiStringIndexerModelData) HashSet(java.util.HashSet)

Example 42 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FmRecommKernel method recommendUsersPerItem.

@Override
public MTable recommendUsersPerItem(Object itemId) {
    SparseVector itemFea = itemFeatures.get(itemId);
    Set<Object> excludes = null;
    if (excludeKnown) {
        excludes = historyItemUsers.get(itemId);
    }
    return recommend(userColName, itemFea, excludes, userFeatures, true);
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector)

Example 43 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FmRecommKernel method rate.

@Override
Double rate(Object[] ids) {
    Object userId = ids[0];
    Object itemId = ids[1];
    SparseVector userFea = userFeatures.get(userId);
    SparseVector itemFea = itemFeatures.get(itemId);
    if (userFea != null && itemFea != null) {
        return getScore(combine(userFea, itemFea));
    } else {
        /* unknown user or item */
        return null;
    }
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector)

Example 44 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class FmRecommKernel method recommendItemsPerUser.

@Override
public MTable recommendItemsPerUser(Object userId) {
    SparseVector userFea = userFeatures.get(userId);
    Set<Object> excludes = null;
    if (excludeKnown) {
        excludes = historyUserItems.get(userId);
    }
    return recommend(itemColName, userFea, excludes, itemFeatures, false);
}
Also used : SparseVector(com.alibaba.alink.common.linalg.SparseVector)

Example 45 with SparseVector

use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.

the class ItemCfRecommKernel method recommendUsers.

static MTable recommendUsers(Object itemId, ItemCfRecommData model, int topN, boolean excludeKnown, String objectName, TypeInformation<?> objType) {
    PriorityQueue<RecommItemTopKResult> queue = new PriorityQueue<>(Comparator.comparing(o -> o.similarity));
    Integer itemIndex = model.itemMap.get(itemId);
    if (null == itemIndex) {
        return null;
    }
    SparseVector itemSimilarity = model.itemSimilarities.get(itemIndex);
    Set<Object> users = model.itemUsers.get(itemIndex);
    int[] key = itemSimilarity.getIndices();
    double[] value = itemSimilarity.getValues();
    Map<Object, Double> res = new HashMap<>();
    for (int i = 0; i < key.length; i++) {
        if (model.userRateList[key[i]] != null) {
            for (Tuple2<Object, Double> t : model.userRateList[key[i]]) {
                res.merge(t.f0, t.f1 * value[i], Double::sum);
            }
        }
    }
    double head = 0;
    for (Map.Entry<Object, Double> entry : res.entrySet()) {
        if (excludeKnown && users.contains(entry.getKey())) {
            continue;
        }
        head = updateQueue(queue, topN, entry.getValue() / users.size(), entry.getKey(), head);
    }
    return serializeQueue(queue, KObjectUtil.SCORE_NAME, objectName, objType);
}
Also used : Arrays(java.util.Arrays) Tuple2(org.apache.flink.api.java.tuple.Tuple2) PriorityQueue(java.util.PriorityQueue) Set(java.util.Set) TableSchema(org.apache.flink.table.api.TableSchema) HashMap(java.util.HashMap) BaseItemsPerUserRecommParams(com.alibaba.alink.params.recommendation.BaseItemsPerUserRecommParams) BaseSimilarItemsRecommParams(com.alibaba.alink.params.recommendation.BaseSimilarItemsRecommParams) Serializable(java.io.Serializable) ArrayList(java.util.ArrayList) List(java.util.List) MTable(com.alibaba.alink.common.MTable) Map(java.util.Map) Row(org.apache.flink.types.Row) Queue(java.util.Queue) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) SparseVector(com.alibaba.alink.common.linalg.SparseVector) Comparator(java.util.Comparator) Params(org.apache.flink.ml.api.misc.param.Params) FlinkTypeConverter(com.alibaba.alink.operator.common.io.types.FlinkTypeConverter) Collections(java.util.Collections) HashMap(java.util.HashMap) PriorityQueue(java.util.PriorityQueue) SparseVector(com.alibaba.alink.common.linalg.SparseVector) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SparseVector (com.alibaba.alink.common.linalg.SparseVector)125 Test (org.junit.Test)63 DenseVector (com.alibaba.alink.common.linalg.DenseVector)60 Params (org.apache.flink.ml.api.misc.param.Params)45 Row (org.apache.flink.types.Row)45 Vector (com.alibaba.alink.common.linalg.Vector)40 TableSchema (org.apache.flink.table.api.TableSchema)27 ArrayList (java.util.ArrayList)21 TypeInformation (org.apache.flink.api.common.typeinfo.TypeInformation)15 HashMap (java.util.HashMap)12 Tuple2 (org.apache.flink.api.java.tuple.Tuple2)12 List (java.util.List)11 DenseMatrix (com.alibaba.alink.common.linalg.DenseMatrix)10 MTable (com.alibaba.alink.common.MTable)7 BaseVectorSummary (com.alibaba.alink.operator.common.statistics.basicstatistic.BaseVectorSummary)6 CollectSinkStreamOp (com.alibaba.alink.operator.stream.sink.CollectSinkStreamOp)6 Map (java.util.Map)6 MemSourceBatchOp (com.alibaba.alink.operator.batch.source.MemSourceBatchOp)5 VectorAssemblerParams (com.alibaba.alink.params.dataproc.vector.VectorAssemblerParams)5 OneHotPredictParams (com.alibaba.alink.params.feature.OneHotPredictParams)5