use of weka.attributeSelection.ASEvaluation in project dkpro-tc by dkpro.
the class WekaUtils method singleLabelAttributeSelection.
/**
* Feature selection using Weka.
*
* @param trainData
* weka train data
* @param featureSearcher
* list of features
* @param attributeEvaluator
* list of attribute evaluators
* @return attribute selection
* @throws Exception
* in case of errors
*/
public static AttributeSelection singleLabelAttributeSelection(Instances trainData, List<String> featureSearcher, List<String> attributeEvaluator) throws Exception {
AttributeSelection selector = new AttributeSelection();
// Get feature searcher
ASSearch search = ASSearch.forName(featureSearcher.get(0), featureSearcher.subList(1, featureSearcher.size()).toArray(new String[0]));
// Get attribute evaluator
ASEvaluation evaluation = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
selector.setSearch(search);
selector.setEvaluator(evaluation);
selector.SelectAttributes(trainData);
return selector;
}
use of weka.attributeSelection.ASEvaluation in project dkpro-tc by dkpro.
the class WekaUtils method featureSelectionMultilabel.
/**
* Feature selection using Mulan.
*
* @param aContext
* Lab context
* @param trainData
* training data
* @param attributeEvaluator
* evaluator
* @param labelTransformationMethod
* transformation method
* @param numLabelsToKeep
* mapping
* @return remove object
* @throws TextClassificationException
* in case of errors
*/
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
// file to hold the results of attribute selection
File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
// filter for reducing dimension of attributes
Remove filterRemove = new Remove();
try {
MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
AttributeEvaluator attributeSelectionFilter;
// is complicated due to missing commandline support of mulan):
if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
} else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
} else {
throw new TextClassificationException("This Label Transformation Method is not supported.");
}
Ranker r = new Ranker();
int[] result = r.search(attributeSelectionFilter, mulanInstances);
// collect evaluation for *all* attributes and write to file
StringBuffer evalFile = new StringBuffer();
for (Attribute att : mulanInstances.getFeatureAttributes()) {
evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
}
FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
// create a filter to reduce the dimension of the attributes
int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
int[] labelIndices = mulanInstances.getLabelIndices();
System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
filterRemove.setAttributeIndicesArray(toKeep);
filterRemove.setInvertSelection(true);
filterRemove.setInputFormat(mulanInstances.getDataSet());
} catch (ArrayIndexOutOfBoundsException e) {
// less attributes than we want => no filtering
return null;
} catch (Exception e) {
throw new TextClassificationException(e);
}
return filterRemove;
}
Aggregations