Search in sources :

Example 1 with ASEvaluation

use of weka.attributeSelection.ASEvaluation in project dkpro-tc by dkpro.

the class WekaUtils method singleLabelAttributeSelection.

/**
 * Feature selection using Weka.
 *
 * @param trainData
 *            weka train data
 * @param featureSearcher
 *            list of features
 * @param attributeEvaluator
 *            list of attribute evaluators
 * @return attribute selection
 * @throws Exception
 *             in case of errors
 */
public static AttributeSelection singleLabelAttributeSelection(Instances trainData, List<String> featureSearcher, List<String> attributeEvaluator) throws Exception {
    AttributeSelection selector = new AttributeSelection();
    // Get feature searcher
    ASSearch search = ASSearch.forName(featureSearcher.get(0), featureSearcher.subList(1, featureSearcher.size()).toArray(new String[0]));
    // Get attribute evaluator
    ASEvaluation evaluation = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
    selector.setSearch(search);
    selector.setEvaluator(evaluation);
    selector.SelectAttributes(trainData);
    return selector;
}
Also used : ASEvaluation(weka.attributeSelection.ASEvaluation) AttributeSelection(weka.attributeSelection.AttributeSelection) ASSearch(weka.attributeSelection.ASSearch)

Example 2 with ASEvaluation

use of weka.attributeSelection.ASEvaluation in project dkpro-tc by dkpro.

the class WekaUtils method featureSelectionMultilabel.

/**
 * Feature selection using Mulan.
 *
 * @param aContext
 *            Lab context
 * @param trainData
 *            training data
 * @param attributeEvaluator
 *            evaluator
 * @param labelTransformationMethod
 *            transformation method
 * @param numLabelsToKeep
 *            mapping
 * @return remove object
 * @throws TextClassificationException
 *             in case of errors
 */
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
    // file to hold the results of attribute selection
    File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
    // filter for reducing dimension of attributes
    Remove filterRemove = new Remove();
    try {
        MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
        ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
        AttributeEvaluator attributeSelectionFilter;
        // is complicated due to missing commandline support of mulan):
        if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
            attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
        } else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
            attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
        } else {
            throw new TextClassificationException("This Label Transformation Method is not supported.");
        }
        Ranker r = new Ranker();
        int[] result = r.search(attributeSelectionFilter, mulanInstances);
        // collect evaluation for *all* attributes and write to file
        StringBuffer evalFile = new StringBuffer();
        for (Attribute att : mulanInstances.getFeatureAttributes()) {
            evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
        }
        FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
        // create a filter to reduce the dimension of the attributes
        int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
        int[] labelIndices = mulanInstances.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(mulanInstances.getDataSet());
    } catch (ArrayIndexOutOfBoundsException e) {
        // less attributes than we want => no filtering
        return null;
    } catch (Exception e) {
        throw new TextClassificationException(e);
    }
    return filterRemove;
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) Attribute(weka.core.Attribute) Remove(weka.filters.unsupervised.attribute.Remove) MultiLabelInstances(mulan.data.MultiLabelInstances) AttributeEvaluator(weka.attributeSelection.AttributeEvaluator) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) Ranker(mulan.dimensionalityReduction.Ranker) FileNotFoundException(java.io.FileNotFoundException) InvalidDataFormatException(mulan.data.InvalidDataFormatException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ASEvaluation(weka.attributeSelection.ASEvaluation) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) File(java.io.File)

Aggregations

ASEvaluation (weka.attributeSelection.ASEvaluation)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InvalidDataFormatException (mulan.data.InvalidDataFormatException)1 MultiLabelInstances (mulan.data.MultiLabelInstances)1 BinaryRelevanceAttributeEvaluator (mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator)1 LabelPowersetAttributeEvaluator (mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator)1 Ranker (mulan.dimensionalityReduction.Ranker)1 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)1 ASSearch (weka.attributeSelection.ASSearch)1 AttributeEvaluator (weka.attributeSelection.AttributeEvaluator)1 AttributeSelection (weka.attributeSelection.AttributeSelection)1 Attribute (weka.core.Attribute)1 Remove (weka.filters.unsupervised.attribute.Remove)1