use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.
the class DataSetUtil method concatenateByColumn.
/**
* assuming they have different feature sets
* @param dataSet1
* @param dataSet2
* @return
*/
public static ClfDataSet concatenateByColumn(ClfDataSet dataSet1, ClfDataSet dataSet2) {
int numDataPoints = dataSet1.getNumDataPoints();
int numFeatures1 = dataSet1.getNumFeatures();
int numFeatures2 = dataSet2.getNumFeatures();
int numFeatures = numFeatures1 + numFeatures2;
ClfDataSet dataSet = ClfDataSetBuilder.getBuilder().numDataPoints(numDataPoints).numFeatures(numFeatures).numClasses(dataSet1.getNumClasses()).dense(dataSet1.isDense()).missingValue(dataSet1.hasMissingValue()).build();
int featureIndex = 0;
for (int j = 0; j < numFeatures1; j++) {
Vector vector = dataSet1.getColumn(j);
for (Vector.Element element : vector.nonZeroes()) {
int i = element.index();
double value = element.get();
dataSet.setFeatureValue(i, featureIndex, value);
}
featureIndex += 1;
}
for (int j = 0; j < numFeatures2; j++) {
Vector vector = dataSet2.getColumn(j);
for (Vector.Element element : vector.nonZeroes()) {
int i = element.index();
double value = element.get();
dataSet.setFeatureValue(i, featureIndex, value);
}
featureIndex += 1;
}
int[] labels = dataSet1.getLabels();
for (int i = 0; i < numDataPoints; i++) {
dataSet.setLabel(i, labels[i]);
}
FeatureList featureList = new FeatureList();
for (Feature feature : dataSet1.getFeatureList().getAll()) {
featureList.add(feature);
}
for (Feature feature : dataSet2.getFeatureList().getAll()) {
featureList.add(feature);
}
dataSet.setFeatureList(featureList);
dataSet.setLabelTranslator(dataSet1.getLabelTranslator());
dataSet.setIdTranslator(dataSet1.getIdTranslator());
return dataSet;
}
use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.
the class DataSetUtil method sampleFeatures.
public static RegDataSet sampleFeatures(RegDataSet dataSet, List<Integer> columnsToKeep) {
RegDataSet trimmed;
trimmed = RegDataSetBuilder.getBuilder().numDataPoints(dataSet.getNumDataPoints()).numFeatures(columnsToKeep.size()).missingValue(dataSet.hasMissingValue()).dense(dataSet.isDense()).build();
for (int j = 0; j < trimmed.getNumFeatures(); j++) {
int oldColumnIndex = columnsToKeep.get(j);
Vector vector = dataSet.getColumn(oldColumnIndex);
for (Vector.Element element : vector.nonZeroes()) {
int dataPointIndex = element.index();
double value = element.get();
trimmed.setFeatureValue(dataPointIndex, j, value);
}
}
//copy labels
double[] labels = dataSet.getLabels();
for (int i = 0; i < trimmed.getNumDataPoints(); i++) {
trimmed.setLabel(i, labels[i]);
}
trimmed.setIdTranslator(dataSet.getIdTranslator());
List<Feature> oldFeatures = dataSet.getFeatureList().getAll();
List<Feature> newFeatures = columnsToKeep.stream().map(oldFeatures::get).collect(Collectors.toList());
for (int i = 0; i < newFeatures.size(); i++) {
newFeatures.get(i).setIndex(i);
}
trimmed.setFeatureList(new FeatureList(newFeatures));
return trimmed;
}
use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.
the class LSBoostInspector method topFeatures.
public static TopFeatures topFeatures(LSBoost boosting) {
Map<Feature, Double> totalContributions = new HashMap<>();
List<Regressor> regressors = boosting.getEnsemble(0).getRegressors();
List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
for (RegressionTree tree : trees) {
Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
Feature feature = entry.getKey();
Double contribution = entry.getValue();
double oldValue = totalContributions.getOrDefault(feature, 0.0);
double newValue = oldValue + contribution;
totalContributions.put(feature, newValue);
}
}
System.out.println(totalContributions);
Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).map(Map.Entry::getKey).collect(Collectors.toList());
TopFeatures topFeatures = new TopFeatures();
topFeatures.setTopFeatures(list);
return topFeatures;
}
use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.
the class LKBInspector method topFeatures.
/**
*
* @param lkBoosts ensemble of lktbs
* @param classIndex
* @return
*/
public static TopFeatures topFeatures(List<LKBoost> lkBoosts, int classIndex) {
Map<Feature, Double> totalContributions = new HashMap<>();
for (LKBoost lkBoost : lkBoosts) {
List<Regressor> regressors = lkBoost.getEnsemble(classIndex).getRegressors();
List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
for (RegressionTree tree : trees) {
Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
Feature feature = entry.getKey();
Double contribution = entry.getValue();
double oldValue = totalContributions.getOrDefault(feature, 0.0);
double newValue = oldValue + contribution;
totalContributions.put(feature, newValue);
}
}
}
Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).map(Map.Entry::getKey).collect(Collectors.toList());
TopFeatures topFeatures = new TopFeatures();
topFeatures.setTopFeatures(list);
topFeatures.setClassIndex(classIndex);
LabelTranslator labelTranslator = lkBoosts.get(0).getLabelTranslator();
topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
return topFeatures;
}
use of edu.neu.ccs.pyramid.feature.Feature in project pyramid by cheng-li.
the class LKBInspector method topFeatures.
//todo: consider newton step and learning rate
/**
* only trees are considered
* @param boosting
* @param classIndex
* @return list of feature index and feature name pairs
*/
public static TopFeatures topFeatures(LKBoost boosting, int classIndex) {
Map<Feature, Double> totalContributions = new HashMap<>();
List<Regressor> regressors = boosting.getEnsemble(classIndex).getRegressors();
List<RegressionTree> trees = regressors.stream().filter(regressor -> regressor instanceof RegressionTree).map(regressor -> (RegressionTree) regressor).collect(Collectors.toList());
for (RegressionTree tree : trees) {
Map<Feature, Double> contributions = RegTreeInspector.featureImportance(tree);
for (Map.Entry<Feature, Double> entry : contributions.entrySet()) {
Feature feature = entry.getKey();
Double contribution = entry.getValue();
double oldValue = totalContributions.getOrDefault(feature, 0.0);
double newValue = oldValue + contribution;
totalContributions.put(feature, newValue);
}
}
Comparator<Map.Entry<Feature, Double>> comparator = Comparator.comparing(Map.Entry::getValue);
List<Feature> list = totalContributions.entrySet().stream().sorted(comparator.reversed()).map(Map.Entry::getKey).collect(Collectors.toList());
TopFeatures topFeatures = new TopFeatures();
topFeatures.setTopFeatures(list);
topFeatures.setClassIndex(classIndex);
LabelTranslator labelTranslator = boosting.getLabelTranslator();
topFeatures.setClassName(labelTranslator.toExtLabel(classIndex));
return topFeatures;
}
Aggregations