Search in sources :

Example 41 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class InstanceIdFeatureTest method instanceIdFeatureTest.

@Test
public void instanceIdFeatureTest() throws Exception {
    AnalysisEngine engine = createEngine(NoOpAnnotator.class);
    JCas jcas = engine.newJCas();
    jcas.setDocumentLanguage("en");
    engine.process(jcas);
    TextClassificationTarget unit1 = new TextClassificationTarget(jcas, 0, 1);
    unit1.setId(0);
    unit1.addToIndexes();
    JCasId id = new JCasId(jcas);
    id.setId(123);
    id.addToIndexes();
    Feature feature = InstanceIdFeature.retrieve(jcas, unit1);
    Feature feature2 = InstanceIdFeature.retrieve(jcas);
    Feature feature3 = InstanceIdFeature.retrieve(jcas, unit1, 5);
    assertEquals(feature.getValue(), "123_0");
    assertEquals(feature2.getValue(), "123");
    assertEquals(feature3.getValue(), "123_5_0");
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) JCas(org.apache.uima.jcas.JCas) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngine(org.apache.uima.analysis_engine.AnalysisEngine) Test(org.junit.Test)

Example 42 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class InstanceExtractor method getUnitInstances.

public List<Instance> getUnitInstances(JCas jcas, boolean supportSparseFeatures) throws TextClassificationException {
    List<Instance> instances = new ArrayList<Instance>();
    int jcasId = JCasUtil.selectSingle(jcas, JCasId.class).getId();
    Collection<TextClassificationTarget> targets = JCasUtil.select(jcas, TextClassificationTarget.class);
    for (TextClassificationTarget aTarget : targets) {
        Instance instance = new Instance();
        if (addInstanceId) {
            Feature feat = InstanceIdFeature.retrieve(jcas, aTarget);
            instance.addFeature(feat);
        }
        for (FeatureExtractorResource_ImplBase featExt : featureExtractors) {
            if (!(featExt instanceof FeatureExtractor)) {
                throw new TextClassificationException("Feature extractor does not implement interface [" + FeatureExtractor.class.getName() + "]: " + featExt.getResourceName());
            }
            if (supportSparseFeatures) {
                instance.addFeatures(getSparse(jcas, aTarget, featExt));
            } else {
                instance.addFeatures(getDense(jcas, aTarget, featExt));
            }
        }
        // set and write outcome label(s)
        instance.setOutcomes(getOutcomes(jcas, aTarget));
        instance.setWeight(getWeight(jcas, aTarget));
        instance.setJcasId(jcasId);
        // instance.setSequenceId(sequenceId);
        instance.setSequencePosition(aTarget.getId());
        instances.add(instance);
    }
    return instances;
}
Also used : JCasId(org.dkpro.tc.api.type.JCasId) FeatureExtractor(org.dkpro.tc.api.features.FeatureExtractor) PairFeatureExtractor(org.dkpro.tc.api.features.PairFeatureExtractor) Instance(org.dkpro.tc.api.features.Instance) TextClassificationException(org.dkpro.tc.api.exception.TextClassificationException) ArrayList(java.util.ArrayList) TextClassificationTarget(org.dkpro.tc.api.type.TextClassificationTarget) Feature(org.dkpro.tc.api.features.Feature) InstanceIdFeature(org.dkpro.tc.core.feature.InstanceIdFeature) FeatureExtractorResource_ImplBase(org.dkpro.tc.api.features.FeatureExtractorResource_ImplBase)

Example 43 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class FeatureTestUtil method assertFeatures.

/**
 * @param expectedName
 *            expected
 * @param expectedValue
 *            actual
 * @param features
 *            feature
 * @param epsilon
 *            epsilon
 */
public static void assertFeatures(String expectedName, double expectedValue, Set<Feature> features, double epsilon) {
    Assert.assertNotNull(features);
    boolean found = false;
    for (Feature f : features) {
        if (f.getName().equals(expectedName)) {
            found = true;
            Assert.assertEquals(expectedValue, (Double) f.getValue(), epsilon);
        }
    }
    Assert.assertTrue(found);
}
Also used : Feature(org.dkpro.tc.api.features.Feature)

Example 44 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaDataWriter method getFeatureValues.

private double[] getFeatureValues(AttributeStore attributeStore, Instance instance) {
    double[] featureValues = new double[attributeStore.getAttributes().size()];
    for (Feature feature : instance.getFeatures()) {
        try {
            Attribute attribute = attributeStore.getAttribute(feature.getName());
            Object featureValue = feature.getValue();
            double attributeValue;
            if (feature.getType() == FeatureType.NUMERIC) {
                // numeric attribute
                attributeValue = ((Number) feature.getValue()).doubleValue();
            } else if (feature.getType() == FeatureType.BOOLEAN) {
                // boolean attribute
                if (featureValue instanceof Boolean) {
                    // value is provided as true/false value
                    attributeValue = (Boolean) featureValue ? 1.0d : 0.0d;
                } else {
                    // we already have numerical values
                    if (featureValue instanceof Double) {
                        attributeValue = (Double) featureValue;
                    } else {
                        attributeValue = ((Integer) featureValue).doubleValue();
                    }
                }
            } else {
                // nominal or string
                Object stringValue = feature.getValue();
                if (!attribute.isNominal() && !attribute.isString()) {
                    throw new IllegalArgumentException("Attribute neither nominal nor string: " + stringValue);
                }
                int valIndex = attribute.indexOfValue(stringValue.toString());
                if (valIndex == -1) {
                    if (attribute.isNominal()) {
                        throw new IllegalArgumentException("Value not defined for given nominal attribute!");
                    } else {
                        attribute.addStringValue(stringValue.toString());
                        valIndex = attribute.indexOfValue(stringValue.toString());
                    }
                }
                attributeValue = valIndex;
            }
            int offset = attributeStore.getAttributeOffset(attribute.name());
            if (offset != -1) {
                featureValues[offset] = attributeValue;
            }
        } catch (NullPointerException e) {
        // ignore unseen attributes
        }
    }
    return featureValues;
}
Also used : Attribute(weka.core.Attribute) Feature(org.dkpro.tc.api.features.Feature)

Example 45 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class WekaUtilTest method instanceToArffTest.

@Test
public void instanceToArffTest() throws Exception {
    Instance i1 = new Instance();
    i1.addFeature(new Feature("feature1", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature2", 2, FeatureType.NUMERIC));
    i1.addFeature(new Feature("feature3_{{", "a", FeatureType.STRING));
    i1.addFeature(new Feature("feature4", Values.VALUE_1, FeatureType.NUMERIC));
    i1.setOutcomes("1");
    Instance i2 = new Instance();
    i2.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
    i2.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    i2.addFeature(new Feature("feature4", Values.VALUE_2, FeatureType.NUMERIC));
    i2.setOutcomes("2");
    Instance i3 = new Instance();
    i3.addFeature(new Feature("feature1", 1, FeatureType.NUMERIC));
    i3.addFeature(new Feature("feature2", 1, FeatureType.NUMERIC));
    i3.addFeature(new Feature("feature3_{{", "b", FeatureType.STRING));
    i3.addFeature(new Feature("feature4", Values.VALUE_3, FeatureType.NUMERIC));
    i3.setOutcomes("2");
    List<Instance> iList = new ArrayList<>();
    iList.add(i1);
    iList.add(i2);
    iList.add(i3);
    File outfile = new File("target/test/out.txt");
    outfile.mkdirs();
    outfile.createNewFile();
    outfile.deleteOnExit();
    WekaUtils.instanceListToArffFile(outfile, iList);
    System.out.println(FileUtils.readFileToString(outfile, "utf-8"));
}
Also used : Instance(org.dkpro.tc.api.features.Instance) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) File(java.io.File) Test(org.junit.Test)

Aggregations

Feature (org.dkpro.tc.api.features.Feature)94 Test (org.junit.Test)48 Instance (org.dkpro.tc.api.features.Instance)30 ArrayList (java.util.ArrayList)29 HashSet (java.util.HashSet)21 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)17 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)16 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)16 JCas (org.apache.uima.jcas.JCas)15 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)13 File (java.io.File)8 Attribute (weka.core.Attribute)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)6 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)5 Chunk (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)3