Search in sources :

Example 16 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class DataSetUtil method sampleFeatures.

/**
     * only keep the selected features
     * @param dataSet
     * @return
     */
public static ClfDataSet sampleFeatures(ClfDataSet dataSet, List<Integer> columnsToKeep) {
    ClfDataSet trimmed;
    int numClasses = dataSet.getNumClasses();
    boolean missingValue = dataSet.hasMissingValue();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else {
        trimmed = new SparseClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    }
    for (int j = 0; j < trimmed.getNumFeatures(); j++) {
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element : vector.nonZeroes()) {
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex, j, value);
        }
    }
    //copy labels
    int[] labels = dataSet.getLabels();
    for (int i = 0; i < trimmed.getNumDataPoints(); i++) {
        trimmed.setLabel(i, labels[i]);
    }
    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i = 0; i < newFeatures.size(); i++) {
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));
    return trimmed;
}
Also used : FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 17 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class DataSetUtil method concatenateByColumn.

public static MultiLabelClfDataSet concatenateByColumn(MultiLabelClfDataSet dataSet1, MultiLabelClfDataSet dataSet2) {
    int numDataPoints = dataSet1.getNumDataPoints();
    int numFeatures1 = dataSet1.getNumFeatures();
    int numFeatures2 = dataSet2.getNumFeatures();
    int numFeatures = numFeatures1 + numFeatures2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(numFeatures).numClasses(dataSet1.getNumClasses()).density(dataSet1.density()).missingValue(dataSet1.hasMissingValue()).build();
    int featureIndex = 0;
    for (int j = 0; j < numFeatures1; j++) {
        Vector vector = dataSet1.getColumn(j);
        for (Vector.Element element : vector.nonZeroes()) {
            int i = element.index();
            double value = element.get();
            dataSet.setFeatureValue(i, featureIndex, value);
        }
        featureIndex += 1;
    }
    for (int j = 0; j < numFeatures2; j++) {
        Vector vector = dataSet2.getColumn(j);
        for (Vector.Element element : vector.nonZeroes()) {
            int i = element.index();
            double value = element.get();
            dataSet.setFeatureValue(i, featureIndex, value);
        }
        featureIndex += 1;
    }
    MultiLabel[] labels = dataSet1.getMultiLabels();
    for (int i = 0; i < numDataPoints; i++) {
        dataSet.setLabels(i, labels[i]);
    }
    FeatureList featureList = new FeatureList();
    for (Feature feature : dataSet1.getFeatureList().getAll()) {
        featureList.add(feature);
    }
    for (Feature feature : dataSet2.getFeatureList().getAll()) {
        featureList.add(feature);
    }
    dataSet.setFeatureList(featureList);
    dataSet.setLabelTranslator(dataSet1.getLabelTranslator());
    dataSet.setIdTranslator(dataSet1.getIdTranslator());
    return dataSet;
}
Also used : FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 18 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class DataSetUtil method sampleFeatures.

/**
     * only keep the selected featureList
     * @param dataSet
     * @return
     */
public static MultiLabelClfDataSet sampleFeatures(MultiLabelClfDataSet dataSet, List<Integer> columnsToKeep) {
    MultiLabelClfDataSet trimmed;
    boolean missingValue = dataSet.hasMissingValue();
    int numClasses = dataSet.getNumClasses();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else {
        trimmed = new SparseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    }
    for (int j = 0; j < trimmed.getNumFeatures(); j++) {
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element : vector.nonZeroes()) {
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex, j, value);
        }
    }
    //copy labels
    MultiLabel[] multiLabels = dataSet.getMultiLabels();
    for (int i = 0; i < trimmed.getNumDataPoints(); i++) {
        trimmed.addLabels(i, multiLabels[i].getMatchedLabels());
    }
    //just copy settings
    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i = 0; i < newFeatures.size(); i++) {
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));
    return trimmed;
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector)

Example 19 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class HMLGBInspector method topFeatures.

//todo: consider newton step and learning rate
/**
     * only trees are considered
     * @param boosting
     * @param classIndex
     * @return list of feature index and feature name pairs
     */
public static TopFeatures topFeatures(HMLGradientBoosting boosting, int classIndex, int limit) {
    Map<Feature, Double> totalContributions = new HashMap<>();
    List<Regressor> regressors = boosting.getRegressors(classIndex);
    List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
    for (RegressionTree tree : trees) {
        Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
        for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
            Feature feature = entry.getKey();
            Double contribution = entry.getValue();
            double oldValue = totalContributions.getOrDefault(feature, 0.0);
            double newValue = oldValue + contribution;
            totalContributions.put(feature, newValue);
        }
    }
    Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
    List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).limit(limit).map(Map.Entry::getKey).collect(Collectors.toList());
    TopFeatures topFeatures = new TopFeatures();
    topFeatures.setTopFeatures(list);
    topFeatures.setClassIndex(classIndex);
    LabelTranslator labelTranslator = boosting.getLabelTranslator();
    topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
    return topFeatures;
}
Also used : MultiLabelPredictionAnalysis(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis) edu.neu.ccs.pyramid.regression(edu.neu.ccs.pyramid.regression) IntStream(java.util.stream.IntStream) ClassProbability(edu.neu.ccs.pyramid.classification.ClassProbability) java.util(java.util) RegTreeInspector(edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector) Collectors(java.util.stream.Collectors) IMLGradientBoosting(edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) TreeRule(edu.neu.ccs.pyramid.regression.regression_tree.TreeRule) Feature(edu.neu.ccs.pyramid.feature.Feature) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) Vector(org.apache.mahout.math.Vector) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) Pair(edu.neu.ccs.pyramid.util.Pair) RegressionTree(edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 20 with Feature

use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.

the class MLLogisticRegressionInspector method topFeatures.

public static TopFeatures topFeatures(MLLogisticRegression logisticRegression, int classIndex, int limit) {
    FeatureList featureList = logisticRegression.getFeatureList();
    Vector weights = logisticRegression.getWeights().getWeightsWithoutBiasForClass(classIndex);
    Comparator<FeatureUtility> comparator = Comparator.comparing(FeatureUtility::getUtility);
    List<Feature> list = IntStream.range(0, weights.size()).mapToObj(i -> new FeatureUtility(featureList.get(i)).setUtility(weights.get(i))).filter(featureUtility -> featureUtility.getUtility() > 0).sorted(comparator.reversed()).map(FeatureUtility::getFeature).limit(limit).collect(Collectors.toList());
    TopFeatures topFeatures = new TopFeatures();
    topFeatures.setTopFeatures(list);
    topFeatures.setClassIndex(classIndex);
    LabelTranslator labelTranslator = logisticRegression.getLabelTranslator();
    topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
    return topFeatures;
}
Also used : FeatureUtility(edu.neu.ccs.pyramid.feature.FeatureUtility) FeatureUtility(edu.neu.ccs.pyramid.feature.FeatureUtility) MultiLabelPredictionAnalysis(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis) IntStream(java.util.stream.IntStream) ClassProbability(edu.neu.ccs.pyramid.classification.ClassProbability) ConstantRule(edu.neu.ccs.pyramid.regression.ConstantRule) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Rule(edu.neu.ccs.pyramid.regression.Rule) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) IMLGradientBoosting(edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting) PredictionAnalysis(edu.neu.ccs.pyramid.classification.PredictionAnalysis) List(java.util.List) Feature(edu.neu.ccs.pyramid.feature.Feature) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) Vector(org.apache.mahout.math.Vector) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) LinearRule(edu.neu.ccs.pyramid.regression.LinearRule) Comparator(java.util.Comparator) ClassScoreCalculation(edu.neu.ccs.pyramid.regression.ClassScoreCalculation) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Aggregations

Feature (edu.neu.ccs.pyramid.feature.Feature)23 Vector (org.apache.mahout.math.Vector)14 FeatureList (edu.neu.ccs.pyramid.feature.FeatureList)13 Collectors (java.util.stream.Collectors)9 TopFeatures (edu.neu.ccs.pyramid.feature.TopFeatures)8 RegTreeInspector (edu.neu.ccs.pyramid.regression.regression_tree.RegTreeInspector)6 RegressionTree (edu.neu.ccs.pyramid.regression.regression_tree.RegressionTree)6 java.util (java.util)6 MultiLabelPredictionAnalysis (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis)5 edu.neu.ccs.pyramid.regression (edu.neu.ccs.pyramid.regression)5 TreeRule (edu.neu.ccs.pyramid.regression.regression_tree.TreeRule)5 ClassProbability (edu.neu.ccs.pyramid.classification.ClassProbability)4 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)4 LabelTranslator (edu.neu.ccs.pyramid.dataset.LabelTranslator)4 Pair (edu.neu.ccs.pyramid.util.Pair)4 IntStream (java.util.stream.IntStream)4 PredictionAnalysis (edu.neu.ccs.pyramid.classification.PredictionAnalysis)3 IdTranslator (edu.neu.ccs.pyramid.dataset.IdTranslator)3 IMLGradientBoosting (edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting)3 ArrayList (java.util.ArrayList)3