Search in sources :

Example 86 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class BaselinePairFeatureTest method extractTest.

@Test
public void extractTest() throws Exception {
    AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
    AnalysisEngine engine = createEngine(desc);
    PairFeatureExtractor extractor = new AlwaysZeroPairFeatureExtractor();
    Set<Feature> features = runExtractor(engine, extractor);
    assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature("BaselineFeature", 0, feature);
    }
}
Also used : AlwaysZeroPairFeatureExtractor(org.dkpro.tc.features.pair.core.AlwaysZeroPairFeatureExtractor) PairFeatureExtractor(org.dkpro.tc.api.features.PairFeatureExtractor) AnalysisEngineDescription(org.apache.uima.analysis_engine.AnalysisEngineDescription) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) AlwaysZeroPairFeatureExtractor(org.dkpro.tc.features.pair.core.AlwaysZeroPairFeatureExtractor) Test(org.junit.Test)

Example 87 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaUtils method getFeatureValues.

/**
 * @param attributeStore
 *            weka attribute store
 * @param instance
 *            tc instances
 * @return array of double values
 */
private static double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
    double[] featureValues = new double[attributeStore.getAttributes().size()];
    for (Feature feature : instance.getFeatures()) {
        try {
            Attribute attribute = attributeStore.getAttribute(feature.getName());
            Object featureValue = feature.getValue();
            double attributeValue;
            if (featureValue instanceof Number) {
                // numeric attribute
                attributeValue = ((Number) feature.getValue()).doubleValue();
            } else if (featureValue instanceof Boolean) {
                // boolean attribute
                attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
            } else if (featureValue == null) {
                // null
                throw new IllegalArgumentException("You have an instance which doesn't specify a value for the feature " + feature.getName());
            } else {
                // nominal or string
                Object stringValue = feature.getValue();
                if (!attribute.isNominal() && !attribute.isString()) {
                    throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
                }
                int valIndex = attribute.indexOfValue(stringValue.toString());
                if (valIndex == -1) {
                    if (attribute.isNominal()) {
                        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
                    } else {
                        attribute.addStringValue(stringValue.toString());
                        valIndex = attribute.indexOfValue(stringValue.toString());
                    }
                }
                attributeValue = valIndex;
            }
            int offset = attributeStore.getAttributeOffset(attribute.name());
            if (offset != -1) {
                featureValues[offset] = attributeValue;
            }
        } catch (NullPointerException e) {
        // ignore unseen attributes
        }
    }
    return featureValues;
}
Also used : Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 88 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class MekaDataWriter method getFeatureValues.

private double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
    double[] featureValues = new double[attributeStore.getAttributes().size()];
    for (Feature feature : instance.getFeatures()) {
        try {
            Attribute attribute = attributeStore.getAttribute(feature.getName());
            Object featureValue = feature.getValue();
            double attributeValue;
            if (featureValue instanceof Number) {
                // numeric attribute
                attributeValue = ((Number) feature.getValue()).doubleValue();
            } else if (featureValue instanceof Boolean) {
                // boolean attribute
                attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
            } else if (featureValue == null) {
                // null
                throw new IllegalArgumentException("You have an instance which doesn't specify a value for the feature " + feature.getName());
            } else {
                // nominal or string
                Object stringValue = feature.getValue();
                if (!attribute.isNominal() && !attribute.isString()) {
                    throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
                }
                int valIndex = attribute.indexOfValue(stringValue.toString());
                if (valIndex == -1) {
                    if (attribute.isNominal()) {
                        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
                    } else {
                        attribute.addStringValue(stringValue.toString());
                        valIndex = attribute.indexOfValue(stringValue.toString());
                    }
                }
                attributeValue = valIndex;
            }
            int offset = attributeStore.getAttributeOffset(attribute.name());
            if (offset != -1) {
                featureValues[offset] = attributeValue;
            }
        } catch (NullPointerException e) {
        // ignore unseen attributes
        }
    }
    return featureValues;
}
Also used : Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 89 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaFeatureEncoder method getAttributeStore.

public static AttributeStore getAttributeStore(Collection<Instance> instances) throws TextClassificationException {
    AttributeStore attributeStore = new AttributeStore();
    for (Instance instance : instances) {
        for (Feature feature : instance.getFeatures()) {
            if (!attributeStore.containsAttributeName(feature.getName())) {
                Attribute attribute = featureToAttribute(feature);
                attributeStore.addAttribute(feature.getName(), attribute);
            }
        }
    }
    return attributeStore;
}
Also used : AttributeStore(org.dkpro.tc.ml.weka.util.AttributeStore) Instance(org.dkpro.tc.api.features.Instance) Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 90 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class FeatureTestUtil method assertFeatures.

/**
 * @param expectedName
 *            expected
 * @param expectedValue
 *            actual
 * @param features
 *            features
 */
public static void assertFeatures(String expectedName, int expectedValue, Set<Feature> features) {
    Assert.assertNotNull(features);
    boolean found = false;
    for (Feature f : features) {
        if (f.getName().equals(expectedName)) {
            found = true;
            Assert.assertEquals(expectedValue, (int) f.getValue());
        }
    }
    Assert.assertTrue(found);
}
Also used : Feature(org.dkpro.tc.api.features.Feature)

Aggregations

Feature (org.dkpro.tc.api.features.Feature)94 Test (org.junit.Test)48 Instance (org.dkpro.tc.api.features.Instance)30 ArrayList (java.util.ArrayList)29 HashSet (java.util.HashSet)21 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)17 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)16 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)16 JCas (org.apache.uima.jcas.JCas)15 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)13 File (java.io.File)8 Attribute (weka.core.Attribute)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)6 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)5 Chunk (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)3