Search in sources :

Example 6 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class DataSetUtil method concatenateByColumn.

/**
     * assuming they have different feature sets
     * @param dataSet1
     * @param dataSet2
     * @return
     */
public static ClfDataSet concatenateByColumn(ClfDataSet dataSet1, ClfDataSet dataSet2) {
    int numDataPoints = dataSet1.getNumDataPoints();
    int numFeatures1 = dataSet1.getNumFeatures();
    int numFeatures2 = dataSet2.getNumFeatures();
    int numFeatures = numFeatures1 + numFeatures2;
    ClfDataSet dataSet = ClfDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(numFeatures).numClasses(dataSet1.getNumClasses()).dense(dataSet1.isDense()).missingValue(dataSet1.hasMissingValue()).build();
    int featureIndex = 0;
    for (int j = 0; j < numFeatures1; j++) {
        Vector vector = dataSet1.getColumn(j);
        for (Vector.Element element : vector.nonZeroes()) {
            int i = element.index();
            double value = element.get();
            dataSet.setFeatureValue(i, featureIndex, value);
        }
        featureIndex += 1;
    }
    for (int j = 0; j < numFeatures2; j++) {
        Vector vector = dataSet2.getColumn(j);
        for (Vector.Element element : vector.nonZeroes()) {
            int i = element.index();
            double value = element.get();
            dataSet.setFeatureValue(i, featureIndex, value);
        }
        featureIndex += 1;
    }
    int[] labels = dataSet1.getLabels();
    for (int i = 0; i < numDataPoints; i++) {
        dataSet.setLabel(i, labels[i]);
    }
    FeatureList featureList = new FeatureList();
    for (Feature feature : dataSet1.getFeatureList().getAll()) {
        featureList.add(feature);
    }
    for (Feature feature : dataSet2.getFeatureList().getAll()) {
        featureList.add(feature);
    }
    dataSet.setFeatureList(featureList);
    dataSet.setLabelTranslator(dataSet1.getLabelTranslator());
    dataSet.setIdTranslator(dataSet1.getIdTranslator());
    return dataSet;
}
Also used : FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 7 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class DataSetUtil method sampleFeatures.

public static RegDataSet sampleFeatures(RegDataSet dataSet, List<Integer> columnsToKeep) {
    RegDataSet trimmed;
    trimmed = RegDataSetBuilder.getBuilder().numDataPoints(dataSet.getNumDataPoints()).numFeatures(columnsToKeep.size()).missingValue(dataSet.hasMissingValue()).dense(dataSet.isDense()).build();
    for (int j = 0; j < trimmed.getNumFeatures(); j++) {
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element : vector.nonZeroes()) {
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex, j, value);
        }
    }
    //copy labels
    double[] labels = dataSet.getLabels();
    for (int i = 0; i < trimmed.getNumDataPoints(); i++) {
        trimmed.setLabel(i, labels[i]);
    }
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i = 0; i < newFeatures.size(); i++) {
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));
    return trimmed;
}
Also used : FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 8 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class LSBoostInspector method topFeatures.

public static TopFeatures topFeatures(LSBoost boosting) {
    Map<Feature, Double> totalContributions = new HashMap<>();
    List<Regressor> regressors = boosting.getEnsemble(0).getRegressors();
    List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
    for (RegressionTree tree : trees) {
        Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
        for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
            Feature feature = entry.getKey();
            Double contribution = entry.getValue();
            double oldValue = totalContributions.getOrDefault(feature, 0.0);
            double newValue = oldValue + contribution;
            totalContributions.put(feature, newValue);
        }
    }
    System.out.println(totalContributions);
    Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
    List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).map(Map.Entry::getKey).collect(Collectors.toList());
    TopFeatures topFeatures = new TopFeatures();
    topFeatures.setTopFeatures(list);
    return topFeatures;
}
Also used : RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) List(java.util.List) Feature(edu.neu.ccs.pyramid.feature.Feature) LabelTranslator(edu.neu.ccs.pyramid.dataset.LabelTranslator) Regressor(edu.neu.ccs.pyramid.regression.Regressor) Map(java.util.Map) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) HashMap(java.util.HashMap) RegTreeInspector(edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector) LKBoost(edu.neu.ccs.pyramid.classification.lkboost.LKBoost) Comparator(java.util.Comparator) Collectors(java.util.stream.Collectors) HashMap(java.util.HashMap) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) Feature(edu.neu.ccs.pyramid.feature.Feature) Regressor(edu.neu.ccs.pyramid.regression.Regressor) Map(java.util.Map) HashMap(java.util.HashMap)

Example 9 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class LKBInspector method topFeatures.

/**
     *
     * @param lkBoosts ensemble of lktbs
     * @param classIndex
     * @return
     */
public static TopFeatures topFeatures(List<LKBoost> lkBoosts, int classIndex) {
    Map<Feature, Double> totalContributions = new HashMap<>();
    for (LKBoost lkBoost : lkBoosts) {
        List<Regressor> regressors = lkBoost.getEnsemble(classIndex).getRegressors();
        List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
        for (RegressionTree tree : trees) {
            Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
            for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
                Feature feature = entry.getKey();
                Double contribution = entry.getValue();
                double oldValue = totalContributions.getOrDefault(feature, 0.0);
                double newValue = oldValue + contribution;
                totalContributions.put(feature, newValue);
            }
        }
    }
    Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
    List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).map(Map.Entry::getKey).collect(Collectors.toList());
    TopFeatures topFeatures = new TopFeatures();
    topFeatures.setTopFeatures(list);
    topFeatures.setClassIndex(classIndex);
    LabelTranslator labelTranslator = lkBoosts.get(0).getLabelTranslator();
    topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
    return topFeatures;
}
Also used : edu.neu.ccs.pyramid.regression(edu.neu.ccs.pyramid.regression) ClassProbability(edu.neu.ccs.pyramid.classification.ClassProbability) java.util(java.util) IdTranslator(edu.neu.ccs.pyramid.dataset.IdTranslator) ClfDataSet(edu.neu.ccs.pyramid.dataset.ClfDataSet) RegTreeInspector(edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector) Collectors(java.util.stream.Collectors) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) PredictionAnalysis(edu.neu.ccs.pyramid.classification.PredictionAnalysis) TreeRule(edu.neu.ccs.pyramid.regression.regression_tree.TreeRule) Feature(edu.neu.ccs.pyramid.feature.Feature) LabelTranslator(edu.neu.ccs.pyramid.dataset.LabelTranslator) Vector(org.apache.mahout.math.Vector) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) Feature(edu.neu.ccs.pyramid.feature.Feature) LabelTranslator(edu.neu.ccs.pyramid.dataset.LabelTranslator)

Example 10 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class LKBInspector method topFeatures.

//todo: consider newton step and learning rate
/**
     * only trees are considered
     * @param boosting
     * @param classIndex
     * @return list of feature index and feature name pairs
     */
public static TopFeatures topFeatures(LKBoost boosting, int classIndex) {
    Map<Feature, Double> totalContributions = new HashMap<>();
    List<Regressor> regressors = boosting.getEnsemble(classIndex).getRegressors();
    List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
    for (RegressionTree tree : trees) {
        Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
        for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
            Feature feature = entry.getKey();
            Double contribution = entry.getValue();
            double oldValue = totalContributions.getOrDefault(feature, 0.0);
            double newValue = oldValue + contribution;
            totalContributions.put(feature, newValue);
        }
    }
    Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
    List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).map(Map.Entry::getKey).collect(Collectors.toList());
    TopFeatures topFeatures = new TopFeatures();
    topFeatures.setTopFeatures(list);
    topFeatures.setClassIndex(classIndex);
    LabelTranslator labelTranslator = boosting.getLabelTranslator();
    topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
    return topFeatures;
}
Also used : edu.neu.ccs.pyramid.regression(edu.neu.ccs.pyramid.regression) ClassProbability(edu.neu.ccs.pyramid.classification.ClassProbability) java.util(java.util) IdTranslator(edu.neu.ccs.pyramid.dataset.IdTranslator) ClfDataSet(edu.neu.ccs.pyramid.dataset.ClfDataSet) RegTreeInspector(edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector) Collectors(java.util.stream.Collectors) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) PredictionAnalysis(edu.neu.ccs.pyramid.classification.PredictionAnalysis) TreeRule(edu.neu.ccs.pyramid.regression.regression_tree.TreeRule) Feature(edu.neu.ccs.pyramid.feature.Feature) LabelTranslator(edu.neu.ccs.pyramid.dataset.LabelTranslator) Vector(org.apache.mahout.math.Vector) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) Feature(edu.neu.ccs.pyramid.feature.Feature) LabelTranslator(edu.neu.ccs.pyramid.dataset.LabelTranslator)

Aggregations

Feature (edu.neu.ccs.pyramid.feature.Feature)23 Vector (org.apache.mahout.math.Vector)14 FeatureList (edu.neu.ccs.pyramid.feature.FeatureList)13 Collectors (java.util.stream.Collectors)9 TopFeatures (edu.neu.ccs.pyramid.feature.TopFeatures)8 RegTreeInspector (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector)6 RegressionTree (edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree)6 java.util (java.util)6 MultiLabelPredictionAnalysis (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis)5 edu.neu.ccs.pyramid.regression (edu.neu.ccs.pyramid.regression)5 TreeRule (edu.neu.ccs.pyramid.regression.regression_tree.TreeRule)5 ClassProbability (edu.neu.ccs.pyramid.classification.ClassProbability)4 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)4 LabelTranslator (edu.neu.ccs.pyramid.dataset.LabelTranslator)4 Pair (edu.neu.ccs.pyramid.util.Pair)4 IntStream (java.util.stream.IntStream)4 PredictionAnalysis (edu.neu.ccs.pyramid.classification.PredictionAnalysis)3 IdTranslator (edu.neu.ccs.pyramid.dataset.IdTranslator)3 IMLGradientBoosting (edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting)3 ArrayList (java.util.ArrayList)3