Search in sources :

Example 1 with MultiLabelInstances

use of mulan.data.MultiLabelInstances in project dkpro-tc by dkpro.

the class WekaUtils method featureSelectionMultilabel.

/**
 * Feature selection using Mulan.
 *
 * @param aContext
 *            Lab context
 * @param trainData
 *            training data
 * @param attributeEvaluator
 *            evaluator
 * @param labelTransformationMethod
 *            transformation method
 * @param numLabelsToKeep
 *            mapping
 * @return remove object
 * @throws TextClassificationException
 *             in case of errors
 */
public static Remove featureSelectionMultilabel(TaskContext aContext, Instances trainData, List<String> attributeEvaluator, String labelTransformationMethod, int numLabelsToKeep) throws TextClassificationException {
    // file to hold the results of attribute selection
    File fsResultsFile = getFile(aContext, TEST_TASK_OUTPUT_KEY, WekaTestTask.featureSelectionFile, AccessMode.READWRITE);
    // filter for reducing dimension of attributes
    Remove filterRemove = new Remove();
    try {
        MultiLabelInstances mulanInstances = convertMekaInstancesToMulanInstances(trainData);
        ASEvaluation eval = ASEvaluation.forName(attributeEvaluator.get(0), attributeEvaluator.subList(1, attributeEvaluator.size()).toArray(new String[0]));
        AttributeEvaluator attributeSelectionFilter;
        // is complicated due to missing commandline support of mulan):
        if (labelTransformationMethod.equals("LabelPowersetAttributeEvaluator")) {
            attributeSelectionFilter = new LabelPowersetAttributeEvaluator(eval, mulanInstances);
        } else if (labelTransformationMethod.equals("BinaryRelevanceAttributeEvaluator")) {
            attributeSelectionFilter = new BinaryRelevanceAttributeEvaluator(eval, mulanInstances, "max", "none", "rank");
        } else {
            throw new TextClassificationException("This Label Transformation Method is not supported.");
        }
        Ranker r = new Ranker();
        int[] result = r.search(attributeSelectionFilter, mulanInstances);
        // collect evaluation for *all* attributes and write to file
        StringBuffer evalFile = new StringBuffer();
        for (Attribute att : mulanInstances.getFeatureAttributes()) {
            evalFile.append(att.name() + ": " + attributeSelectionFilter.evaluateAttribute(att.index() - mulanInstances.getNumLabels()) + "\n");
        }
        FileUtils.writeStringToFile(fsResultsFile, evalFile.toString(), "utf-8");
        // create a filter to reduce the dimension of the attributes
        int[] toKeep = new int[numLabelsToKeep + mulanInstances.getNumLabels()];
        System.arraycopy(result, 0, toKeep, 0, numLabelsToKeep);
        int[] labelIndices = mulanInstances.getLabelIndices();
        System.arraycopy(labelIndices, 0, toKeep, numLabelsToKeep, mulanInstances.getNumLabels());
        filterRemove.setAttributeIndicesArray(toKeep);
        filterRemove.setInvertSelection(true);
        filterRemove.setInputFormat(mulanInstances.getDataSet());
    } catch (ArrayIndexOutOfBoundsException e) {
        // less attributes than we want => no filtering
        return null;
    } catch (Exception e) {
        throw new TextClassificationException(e);
    }
    return filterRemove;
}
Also used : TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) Attribute(weka.core.Attribute) Remove(weka.filters.unsupervised.attribute.Remove) MultiLabelInstances(mulan.data.MultiLabelInstances) AttributeEvaluator(weka.attributeSelection.AttributeEvaluator) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) Ranker(mulan.dimensionalityReduction.Ranker) FileNotFoundException(java.io.FileNotFoundException) InvalidDataFormatException(mulan.data.InvalidDataFormatException) IOException(java.io.IOException) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ASEvaluation(weka.attributeSelection.ASEvaluation) BinaryRelevanceAttributeEvaluator(mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator) LabelPowersetAttributeEvaluator(mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator) File(java.io.File)

Example 2 with MultiLabelInstances

use of mulan.data.MultiLabelInstances in project dkpro-tc by dkpro.

the class WekaUtils method convertMekaInstancesToMulanInstances.

/**
 * Converts the Meka-specific instances format to Mulan-specific instances. Hierarchical
 * relationships among labels cannot be expressed.
 *
 * @param instances
 *            instances
 * @return multi label instances
 * @throws InvalidDataFormatException
 *             in case of data format error
 */
public static MultiLabelInstances convertMekaInstancesToMulanInstances(Instances instances) throws InvalidDataFormatException {
    LabelsMetaDataImpl labelsMetaDataImpl = new LabelsMetaDataImpl();
    for (int i = 0; i < instances.classIndex(); i++) {
        String classAttName = instances.attribute(i).name();
        LabelNodeImpl labelNodeImpl = new LabelNodeImpl(classAttName);
        labelsMetaDataImpl.addRootNode(labelNodeImpl);
    }
    return new MultiLabelInstances(instances, labelsMetaDataImpl);
}
Also used : LabelNodeImpl(mulan.data.LabelNodeImpl) LabelsMetaDataImpl(mulan.data.LabelsMetaDataImpl) MultiLabelInstances(mulan.data.MultiLabelInstances)

Aggregations

MultiLabelInstances (mulan.data.MultiLabelInstances)2 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 IOException (java.io.IOException)1 InvalidDataFormatException (mulan.data.InvalidDataFormatException)1 LabelNodeImpl (mulan.data.LabelNodeImpl)1 LabelsMetaDataImpl (mulan.data.LabelsMetaDataImpl)1 BinaryRelevanceAttributeEvaluator (mulan.dimensionalityReduction.BinaryRelevanceAttributeEvaluator)1 LabelPowersetAttributeEvaluator (mulan.dimensionalityReduction.LabelPowersetAttributeEvaluator)1 Ranker (mulan.dimensionalityReduction.Ranker)1 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)1 ASEvaluation (weka.attributeSelection.ASEvaluation)1 AttributeEvaluator (weka.attributeSelection.AttributeEvaluator)1 Attribute (weka.core.Attribute)1 Remove (weka.filters.unsupervised.attribute.Remove)1