Search in sources :

Example 26 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class SharedNEsFeatureExtractorTest method extractTest1.

@Test
public void extractTest1() throws Exception {
    NamedEntity ne1 = new NamedEntity(jcas1, 0, 4);
    ne1.addToIndexes();
    SharedNEsFeatureExtractor extractor = new SharedNEsFeatureExtractor();
    Set<Feature> features = extractor.extract(jcas1, jcas2);
    assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature("SharedNEs", false, feature);
    }
    NamedEntity ne2 = new NamedEntity(jcas2, 0, 4);
    ne2.addToIndexes();
    features = extractor.extract(jcas1, jcas2);
    assertEquals(1, features.size());
    for (Feature feature : features) {
        assertFeature("SharedNEs", true, feature);
    }
}
Also used : NamedEntity(de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity) FeatureTestUtil.assertFeature(org.dkpro.tc.testing.FeatureTestUtil.assertFeature) Feature(org.dkpro.tc.api.features.Feature) SharedNEsFeatureExtractor(org.dkpro.tc.features.pair.core.ne.SharedNEsFeatureExtractor) Test(org.junit.Test)

Example 27 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class NGramPPipelineTest method testViewBlindFeaturesMarkedWithLocalView.

@Test
public void testViewBlindFeaturesMarkedWithLocalView() throws Exception {
    NGramPPipelineTest test = new NGramPPipelineTest();
    test.initialize();
    test.parameters = new Object[] { LuceneNGramPFE.PARAM_UNIQUE_EXTRACTOR_NAME, "123", LuceneNGramPFE.PARAM_NGRAM_MIN_N, 1, LuceneNGramPFE.PARAM_NGRAM_MAX_N, 1, LuceneNGramPFE.PARAM_USE_VIEW1_NGRAMS_AS_FEATURES, false, LuceneNGramPFE.PARAM_USE_VIEW2_NGRAMS_AS_FEATURES, false, LuceneNGramPFE.PARAM_USE_VIEWBLIND_NGRAMS_AS_FEATURES, true, LuceneNGramPFE.PARAM_MARK_VIEWBLIND_NGRAMS_WITH_LOCAL_VIEW, true, LuceneNGramPFE.PARAM_SOURCE_LOCATION, test.lucenePath, LuceneNGramPMetaCollector.PARAM_TARGET_LOCATION, test.lucenePath };
    test.runPipeline();
    assertTrue(test.featureNames.first().startsWith("view1allNG") || test.featureNames.first().startsWith("view2allNG"));
    assertEquals(test.featureNames.size(), 12);
    assertTrue(test.featureNames.contains("view1allNG_mice"));
    assertTrue(test.featureNames.contains("view2allNG_mice"));
    int pos = 0;
    int neg = 0;
    for (Feature feature : test.instanceList.get(0).getFeatures()) {
        Integer value = ((Double) feature.getValue()).intValue();
        if (value == 1) {
            pos++;
        } else {
            neg++;
        }
    }
    assertEquals(pos, 8);
    assertEquals(neg, 4);
}
Also used : Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 28 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class KeywordCPPipelineTest method testNonBinaryFeatureValues.

// TODO: Write a symmetry test. Note that features will be the same. Needs different dataset.
@Test
public void testNonBinaryFeatureValues() throws Exception {
    KeywordCPPipelineTest test = new KeywordCPPipelineTest();
    test.initialize();
    test.parameters = new Object[] { LuceneKeywordCPFE.PARAM_UNIQUE_EXTRACTOR_NAME, "123", LuceneKeywordCPFE.PARAM_USE_VIEW1_KEYWORD_NGRAMS_AS_FEATURES, false, LuceneKeywordCPFE.PARAM_USE_VIEW2_KEYWORD_NGRAMS_AS_FEATURES, false, LuceneKeywordCPFE.PARAM_USE_VIEWBLIND_KEYWORD_NGRAMS_AS_FEATURES, false, LuceneKeywordCPFE.PARAM_NGRAM_KEYWORDS_FILE, "src/test/resources/data/keywordlist.txt", LuceneKeywordCPFE.PARAM_SOURCE_LOCATION, test.lucenePath, LuceneKeywordCPMetaCollector.PARAM_TARGET_LOCATION, test.lucenePath, LuceneKeywordCPFE.PARAM_NGRAM_BINARY_FEATURE_VALUES_COMBO, false, LuceneKeywordCPFE.PARAM_KEYWORD_NGRAM_MAX_N_COMBO, 2, LuceneKeywordCPFE.PARAM_KEYWORD_NGRAM_SYMMETRY_COMBO, true };
    test.runPipeline();
    int four = 0;
    int three = 0;
    int two = 0;
    int one = 0;
    int zero = 0;
    for (Feature feature : test.instanceList.get(0).getFeatures()) {
        Integer value = ((Double) feature.getValue()).intValue();
        if (new Integer(value) == 4) {
            four++;
        }
        if (new Integer(value) == 3) {
            three++;
        }
        if (new Integer(value) == 2) {
            two++;
        }
        if (new Integer(value) == 1) {
            one++;
        }
        if (new Integer(value) == 0) {
            zero++;
        }
    }
    assertEquals(four, 1);
    assertEquals(three, 6);
    assertEquals(two, 9);
    assertEquals(one, 8);
    assertEquals(zero, 0);
}
Also used : Feature(org.dkpro.tc.api.features.Feature) Test(org.junit.Test)

Example 29 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class NamedEntityPerSentenceRatio method extract.

@Override
public Set<Feature> extract(JCas view, TextClassificationTarget aTarget) throws TextClassificationException {
    Set<Feature> featList = new TreeSet<Feature>();
    int numOrgaNE = JCasUtil.selectCovered(view, Organization.class, aTarget).size();
    int numPersonNE = JCasUtil.selectCovered(view, Person.class, aTarget).size();
    int numLocNE = JCasUtil.selectCovered(view, Location.class, aTarget).size();
    int numSentences = JCasUtil.selectCovered(view, Sentence.class, aTarget).size();
    if (numSentences > 0) {
        featList.add(new Feature("NrOfOrganizationEntities", numOrgaNE, FeatureType.NUMERIC));
        featList.add(new Feature("NrOfPersonEntities", numPersonNE, FeatureType.NUMERIC));
        featList.add(new Feature("NrOfLocationEntities", numLocNE, FeatureType.NUMERIC));
        featList.add(new Feature("NrOfOrganizationEntitiesPerSent", Math.round(((float) numOrgaNE / numSentences) * 100f) / 100f, FeatureType.NUMERIC));
        featList.add(new Feature("NrOfPersonEntitiesPerSent", Math.round(((float) numPersonNE / numSentences) * 100f) / 100f, FeatureType.NUMERIC));
        featList.add(new Feature("NrOfLocationEntitiesPerSent", Math.round(((float) numLocNE / numSentences) * 100f) / 100f, FeatureType.NUMERIC));
    }
    return featList;
}
Also used : Organization(de.tudarmstadt.ukp.dkpro.core.api.ner.type.Organization) TreeSet(java.util.TreeSet) Feature(org.dkpro.tc.api.features.Feature) Person(de.tudarmstadt.ukp.dkpro.core.api.ner.type.Person) Sentence(de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence) Location(de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location)

Example 30 with Feature

use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.

the class LibsvmDataFormatWriter method writeClassifierFormat.

@Override
public void writeClassifierFormat(Collection<Instance> in) throws AnalysisEngineProcessException {
    try {
        if (featureNames2id == null) {
            createFeatureNameMap();
        }
        initClassifierFormat();
        List<Instance> instances = new ArrayList<>(in);
        for (Instance instance : instances) {
            Map<Integer, Double> entry = new HashMap<>();
            recordInstanceId(instance, maxId++, index2instanceId);
            for (Feature f : instance.getFeatures()) {
                Integer id = featureNames2id.get(f.getName());
                Double val = toValue(f.getValue());
                if (Math.abs(val) < 0.00000001) {
                    // skip zero values
                    continue;
                }
                entry.put(id, val);
            }
            List<Integer> keys = new ArrayList<Integer>(entry.keySet());
            Collections.sort(keys);
            if (isRegression()) {
                bw.append(instance.getOutcome() + "\t");
            } else {
                bw.append(outcomeMap.get(instance.getOutcome()) + "\t");
            }
            bw.append(injectSequenceId(instance));
            for (int i = 0; i < keys.size(); i++) {
                Integer key = keys.get(i);
                Double value = entry.get(key);
                bw.append("" + key.toString() + ":" + value.toString());
                if (i + 1 < keys.size()) {
                    bw.append("\t");
                }
            }
            bw.append("\n");
        }
        writeMapping(outputDirectory, INDEX2INSTANCEID, index2instanceId);
        writeFeatureName2idMapping(outputDirectory, AdapterFormat.getFeatureNameMappingFilename(), featureNames2id);
        writeOutcomeMapping(outputDirectory, AdapterFormat.getOutcomeMappingFilename(), outcomeMap);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    } finally {
        IOUtils.closeQuietly(bw);
        // important, we reopen the stream only if the pointer is null!
        bw = null;
    }
}
Also used : Instance(org.dkpro.tc.api.features.Instance) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Feature(org.dkpro.tc.api.features.Feature) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException) IOException(java.io.IOException) AnalysisEngineProcessException(org.apache.uima.analysis_engine.AnalysisEngineProcessException)

Aggregations

Feature (org.dkpro.tc.api.features.Feature)94 Test (org.junit.Test)48 Instance (org.dkpro.tc.api.features.Instance)30 ArrayList (java.util.ArrayList)29 HashSet (java.util.HashSet)21 FeatureTestUtil.assertFeature (org.dkpro.tc.testing.FeatureTestUtil.assertFeature)17 AnalysisEngine (org.apache.uima.analysis_engine.AnalysisEngine)16 TextClassificationTarget (org.dkpro.tc.api.type.TextClassificationTarget)16 JCas (org.apache.uima.jcas.JCas)15 AnalysisEngineDescription (org.apache.uima.analysis_engine.AnalysisEngineDescription)13 File (java.io.File)8 Attribute (weka.core.Attribute)8 Token (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token)7 Sentence (de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence)6 TextClassificationException (org.dkpro.tc.api.exception.TextClassificationException)5 Chunk (de.tudarmstadt.ukp.dkpro.core.api.syntax.type.chunk.Chunk)4 OpenNlpPosTagger (de.tudarmstadt.ukp.dkpro.core.opennlp.OpenNlpPosTagger)4 BreakIteratorSegmenter (de.tudarmstadt.ukp.dkpro.core.tokit.BreakIteratorSegmenter)4 Instances (weka.core.Instances)4 IOException (java.io.IOException)3