use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class NaiveBayesModelInfo method getCategoryFeatureInfo.
/**
* This function gets the feature information of categorical features.
* For each categorical feature, this function calculates the proportion among all the labels.
*/
public HashMap<Object, HashMap<Object, HashMap<Object, Double>>> getCategoryFeatureInfo() {
MultiStringIndexerModelData model = new MultiStringIndexerModelDataConverter().load(stringIndexerModelSerialized);
if (model.meta == null || !model.meta.contains(HasSelectedCols.SELECTED_COLS)) {
return new HashMap<>(0);
}
HashMap<Object, HashMap<Object, HashMap<Object, Double>>> labelFeatureMap = new HashMap<>(labelSize);
String[] cateCols = model.meta.get(HasSelectedCols.SELECTED_COLS);
int tokenNumber = cateCols.length;
HashMap<Long, String>[] tokenIndex = new HashMap[tokenNumber];
for (int i = 0; i < tokenNumber; i++) {
tokenIndex[i] = new HashMap<>((int) model.getNumberOfTokensOfColumn(cateCols[i]));
}
for (Tuple3<Integer, String, Long> tuple3 : model.tokenAndIndex) {
tokenIndex[tuple3.f0].put(tuple3.f2, tuple3.f1);
}
int cateIndex = 0;
for (int i = 0; i < featureSize; i++) {
if (isCategorical[i]) {
String featureName = featureNames[i];
HashSet<Object> featureValue = new HashSet<>();
double[] featureSum = new double[Math.toIntExact(model.getNumberOfTokensOfColumn(cateCols[cateIndex]))];
for (int j = 0; j < labelSize; j++) {
SparseVector sv = featureInfo[j][i];
int[] svIndices = sv.getIndices();
double[] svValues = sv.getValues();
// the value number of this feature.
int feaValNum = svIndices.length;
for (int k = 0; k < feaValNum; k++) {
featureSum[svIndices[k]] += svValues[k];
}
}
for (int j = 0; j < labelSize; j++) {
SparseVector sv = featureInfo[j][i];
int[] svIndices = sv.getIndices();
double[] svValues = sv.getValues();
int feaValNum = svIndices.length;
HashMap<Object, HashMap<Object, Double>> v;
if (!labelFeatureMap.containsKey(labels[j])) {
v = new HashMap<>();
} else {
v = labelFeatureMap.get(labels[j]);
}
HashMap<Object, Double> featureValues = new HashMap<>();
for (int k = 0; k < feaValNum; k++) {
Object key = tokenIndex[cateIndex].get((long) svIndices[k]);
featureValue.add(key);
double value = svValues[k] / featureSum[svIndices[k]];
featureValues.put(key, value);
}
v.put(featureName, featureValues);
labelFeatureMap.put(labels[j], v);
}
cateIndex++;
cateFeatureValue.put(featureName, featureValue);
}
}
// transform
List<String> listFeature = new ArrayList<>();
for (int i = 0; i < featureSize; i++) {
if (isCategorical[i]) {
listFeature.add(featureNames[i]);
}
}
HashMap<Object, HashMap<Object, HashMap<Object, Double>>> res = new HashMap<>(featureSize);
for (String o : listFeature) {
HashMap<Object, HashMap<Object, Double>> labelMap = new HashMap<>(labelSize);
for (Object label : labels) {
labelMap.put(label, labelFeatureMap.get(label).get(o));
}
res.put(o, labelMap);
}
return res;
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class FmRecommKernel method recommendUsersPerItem.
@Override
public MTable recommendUsersPerItem(Object itemId) {
SparseVector itemFea = itemFeatures.get(itemId);
Set<Object> excludes = null;
if (excludeKnown) {
excludes = historyItemUsers.get(itemId);
}
return recommend(userColName, itemFea, excludes, userFeatures, true);
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class FmRecommKernel method rate.
@Override
Double rate(Object[] ids) {
Object userId = ids[0];
Object itemId = ids[1];
SparseVector userFea = userFeatures.get(userId);
SparseVector itemFea = itemFeatures.get(itemId);
if (userFea != null && itemFea != null) {
return getScore(combine(userFea, itemFea));
} else {
/* unknown user or item */
return null;
}
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class FmRecommKernel method recommendItemsPerUser.
@Override
public MTable recommendItemsPerUser(Object userId) {
SparseVector userFea = userFeatures.get(userId);
Set<Object> excludes = null;
if (excludeKnown) {
excludes = historyUserItems.get(userId);
}
return recommend(itemColName, userFea, excludes, itemFeatures, false);
}
use of com.alibaba.alink.common.linalg.SparseVector in project Alink by alibaba.
the class ItemCfRecommKernel method recommendUsers.
static MTable recommendUsers(Object itemId, ItemCfRecommData model, int topN, boolean excludeKnown, String objectName, TypeInformation<?> objType) {
PriorityQueue<RecommItemTopKResult> queue = new PriorityQueue<>(Comparator.comparing(o -> o.similarity));
Integer itemIndex = model.itemMap.get(itemId);
if (null == itemIndex) {
return null;
}
SparseVector itemSimilarity = model.itemSimilarities.get(itemIndex);
Set<Object> users = model.itemUsers.get(itemIndex);
int[] key = itemSimilarity.getIndices();
double[] value = itemSimilarity.getValues();
Map<Object, Double> res = new HashMap<>();
for (int i = 0; i < key.length; i++) {
if (model.userRateList[key[i]] != null) {
for (Tuple2<Object, Double> t : model.userRateList[key[i]]) {
res.merge(t.f0, t.f1 * value[i], Double::sum);
}
}
}
double head = 0;
for (Map.Entry<Object, Double> entry : res.entrySet()) {
if (excludeKnown && users.contains(entry.getKey())) {
continue;
}
head = updateQueue(queue, topN, entry.getValue() / users.size(), entry.getKey(), head);
}
return serializeQueue(queue, KObjectUtil.SCORE_NAME, objectName, objType);
}
Aggregations