Search in sources :

Example 11 with FeatureList

use of edu.neu.ccs.pyramid.feature.FeatureList in project pyramid by cheng-li.

the class DataSetUtil method concatenateByColumn.

public static MultiLabelClfDataSet concatenateByColumn(MultiLabelClfDataSet dataSet1, MultiLabelClfDataSet dataSet2) {
    int numDataPoints = dataSet1.getNumDataPoints();
    int numFeatures1 = dataSet1.getNumFeatures();
    int numFeatures2 = dataSet2.getNumFeatures();
    int numFeatures = numFeatures1 + numFeatures2;
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(numFeatures).numClasses(dataSet1.getNumClasses()).density(dataSet1.density()).missingValue(dataSet1.hasMissingValue()).build();
    int featureIndex = 0;
    for (int j = 0; j < numFeatures1; j++) {
        Vector vector = dataSet1.getColumn(j);
        for (Vector.Element element : vector.nonZeroes()) {
            int i = element.index();
            double value = element.get();
            dataSet.setFeatureValue(i, featureIndex, value);
        }
        featureIndex += 1;
    }
    for (int j = 0; j < numFeatures2; j++) {
        Vector vector = dataSet2.getColumn(j);
        for (Vector.Element element : vector.nonZeroes()) {
            int i = element.index();
            double value = element.get();
            dataSet.setFeatureValue(i, featureIndex, value);
        }
        featureIndex += 1;
    }
    MultiLabel[] labels = dataSet1.getMultiLabels();
    for (int i = 0; i < numDataPoints; i++) {
        dataSet.setLabels(i, labels[i]);
    }
    FeatureList featureList = new FeatureList();
    for (Feature feature : dataSet1.getFeatureList().getAll()) {
        featureList.add(feature);
    }
    for (Feature feature : dataSet2.getFeatureList().getAll()) {
        featureList.add(feature);
    }
    dataSet.setFeatureList(featureList);
    dataSet.setLabelTranslator(dataSet1.getLabelTranslator());
    dataSet.setIdTranslator(dataSet1.getIdTranslator());
    return dataSet;
}
Also used : FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 12 with FeatureList

use of edu.neu.ccs.pyramid.feature.FeatureList in project pyramid by cheng-li.

the class DataSetUtil method sampleFeatures.

/**
     * only keep the selected featureList
     * @param dataSet
     * @return
     */
public static MultiLabelClfDataSet sampleFeatures(MultiLabelClfDataSet dataSet, List<Integer> columnsToKeep) {
    MultiLabelClfDataSet trimmed;
    boolean missingValue = dataSet.hasMissingValue();
    int numClasses = dataSet.getNumClasses();
    // keep density
    if (dataSet.isDense()) {
        trimmed = new DenseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    } else {
        trimmed = new SparseMLClfDataSet(dataSet.getNumDataPoints(), columnsToKeep.size(), missingValue, numClasses);
    }
    for (int j = 0; j < trimmed.getNumFeatures(); j++) {
        int oldColumnIndex = columnsToKeep.get(j);
        Vector vector = dataSet.getColumn(oldColumnIndex);
        for (Vector.Element element : vector.nonZeroes()) {
            int dataPointIndex = element.index();
            double value = element.get();
            trimmed.setFeatureValue(dataPointIndex, j, value);
        }
    }
    //copy labels
    MultiLabel[] multiLabels = dataSet.getMultiLabels();
    for (int i = 0; i < trimmed.getNumDataPoints(); i++) {
        trimmed.addLabels(i, multiLabels[i].getMatchedLabels());
    }
    //just copy settings
    trimmed.setLabelTranslator(dataSet.getLabelTranslator());
    trimmed.setIdTranslator(dataSet.getIdTranslator());
    List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
    List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
    for (int i = 0; i < newFeatures.size(); i++) {
        newFeatures.get(i).setIndex(i);
    }
    trimmed.setFeatureList(new FeatureList(newFeatures));
    return trimmed;
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector)

Example 13 with FeatureList

use of edu.neu.ccs.pyramid.feature.FeatureList in project pyramid by cheng-li.

the class MLLogisticRegressionInspector method topFeatures.

public static TopFeatures topFeatures(MLLogisticRegression logisticRegression, int classIndex, int limit) {
    FeatureList featureList = logisticRegression.getFeatureList();
    Vector weights = logisticRegression.getWeights().getWeightsWithoutBiasForClass(classIndex);
    Comparator<FeatureUtility> comparator = Comparator.comparing(FeatureUtility::getUtility);
    List<Feature> list = IntStream.range(0, weights.size()).mapToObj(i -> new FeatureUtility(featureList.get(i)).setUtility(weights.get(i))).filter(featureUtility -> featureUtility.getUtility() > 0).sorted(comparator.reversed()).map(FeatureUtility::getFeature).limit(limit).collect(Collectors.toList());
    TopFeatures topFeatures = new TopFeatures();
    topFeatures.setTopFeatures(list);
    topFeatures.setClassIndex(classIndex);
    LabelTranslator labelTranslator = logisticRegression.getLabelTranslator();
    topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
    return topFeatures;
}
Also used : FeatureUtility(edu.neu.ccs.pyramid.feature.FeatureUtility) FeatureUtility(edu.neu.ccs.pyramid.feature.FeatureUtility) MultiLabelPredictionAnalysis(edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis) IntStream(java.util.stream.IntStream) ClassProbability(edu.neu.ccs.pyramid.classification.ClassProbability) ConstantRule(edu.neu.ccs.pyramid.regression.ConstantRule) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Rule(edu.neu.ccs.pyramid.regression.Rule) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) IMLGradientBoosting(edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting) PredictionAnalysis(edu.neu.ccs.pyramid.classification.PredictionAnalysis) List(java.util.List) Feature(edu.neu.ccs.pyramid.feature.Feature) LogisticRegression(edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression) edu.neu.ccs.pyramid.dataset(edu.neu.ccs.pyramid.dataset) Vector(org.apache.mahout.math.Vector) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) LinearRule(edu.neu.ccs.pyramid.regression.LinearRule) Comparator(java.util.Comparator) ClassScoreCalculation(edu.neu.ccs.pyramid.regression.ClassScoreCalculation) TopFeatures(edu.neu.ccs.pyramid.feature.TopFeatures) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList) Vector(org.apache.mahout.math.Vector) Feature(edu.neu.ccs.pyramid.feature.Feature)

Example 14 with FeatureList

use of edu.neu.ccs.pyramid.feature.FeatureList in project pyramid by cheng-li.

the class MekaFormat method loadMLClfDatasetPre.

private static MultiLabelClfDataSet loadMLClfDatasetPre(File file, int numClasses, int numFeatures, int numData, Map<String, String> labelMap, Map<String, String> featureMap) throws IOException {
    MultiLabelClfDataSet dataSet = MLClfDataSetBuilder.getBuilder().numDataPoints(numData).numClasses(numClasses).numFeatures(numFeatures).build();
    // set features
    List<Feature> featureList = new LinkedList<>();
    for (int m = 0; m < numFeatures; m++) {
        String featureIndex = Integer.toString(m + numClasses);
        String featureName = featureMap.get(featureIndex);
        Feature feature = new Feature();
        feature.setIndex(m);
        feature.setName(featureName);
        featureList.add(feature);
    }
    dataSet.setFeatureList(new FeatureList(featureList));
    // set Label
    Map<Integer, String> labelIndexMap = new HashMap<>();
    for (Map.Entry<String, String> entry : labelMap.entrySet()) {
        String labelString = entry.getKey();
        String labelName = entry.getValue();
        labelIndexMap.put(Integer.parseInt(labelString), labelName);
    }
    LabelTranslator labelTranslator = new LabelTranslator(labelIndexMap);
    dataSet.setLabelTranslator(labelTranslator);
    // create feature matrix
    BufferedReader br = new BufferedReader(new FileReader(file));
    String line;
    int dataCount = 0;
    while ((line = br.readLine()) != null) {
        if ((line.startsWith("{")) && (line.endsWith("}"))) {
            line = line.substring(1, line.length() - 1);
            String[] indexValues = line.split(",");
            for (String indexValue : indexValues) {
                String[] indexValuePair = indexValue.split(" ");
                String index = indexValuePair[0];
                String value = indexValuePair[1];
                if (labelMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    if (valueDouble == 1.0) {
                        dataSet.addLabel(dataCount, Integer.parseInt(index));
                    }
                } else if (featureMap.containsKey(index)) {
                    double valueDouble = Double.parseDouble(value);
                    int indexInt = Integer.parseInt(index);
                    dataSet.setFeatureValue(dataCount, indexInt - numClasses, valueDouble);
                } else {
                    throw new RuntimeException("Index not found in the line: " + line);
                }
            }
            dataCount++;
        }
    }
    br.close();
    return dataSet;
}
Also used : Feature(edu.neu.ccs.pyramid.feature.Feature) FeatureList(edu.neu.ccs.pyramid.feature.FeatureList)

Aggregations

FeatureList (edu.neu.ccs.pyramid.feature.FeatureList)14 Feature (edu.neu.ccs.pyramid.feature.Feature)13 Vector (org.apache.mahout.math.Vector)8 Collectors (java.util.stream.Collectors)2 ClassProbability (edu.neu.ccs.pyramid.classification.ClassProbability)1 PredictionAnalysis (edu.neu.ccs.pyramid.classification.PredictionAnalysis)1 LogisticRegression (edu.neu.ccs.pyramid.classification.logistic_regression.LogisticRegression)1 edu.neu.ccs.pyramid.dataset (edu.neu.ccs.pyramid.dataset)1 FeatureUtility (edu.neu.ccs.pyramid.feature.FeatureUtility)1 TopFeatures (edu.neu.ccs.pyramid.feature.TopFeatures)1 MultiLabelPredictionAnalysis (edu.neu.ccs.pyramid.multilabel_classification.MultiLabelPredictionAnalysis)1 IMLGradientBoosting (edu.neu.ccs.pyramid.multilabel_classification.imlgb.IMLGradientBoosting)1 ClassScoreCalculation (edu.neu.ccs.pyramid.regression.ClassScoreCalculation)1 ConstantRule (edu.neu.ccs.pyramid.regression.ConstantRule)1 LinearRule (edu.neu.ccs.pyramid.regression.LinearRule)1 Rule (edu.neu.ccs.pyramid.regression.Rule)1 Pair (edu.neu.ccs.pyramid.util.Pair)1 java.util (java.util)1 ArrayList (java.util.ArrayList)1 Comparator (java.util.Comparator)1