use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class SharedNEsFeatureExtractorTest method extractTest1.
@Test
public void extractTest1() throws Exception {
NamedEntity ne1 = new NamedEntity(jcas1, 0, 4);
ne1.addToIndexes();
SharedNEsFeatureExtractor extractor = new SharedNEsFeatureExtractor();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("SharedNEs", false, feature);
}
NamedEntity ne2 = new NamedEntity(jcas2, 0, 4);
ne2.addToIndexes();
features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("SharedNEs", true, feature);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class NGramPPipelineTest method testViewBlindFeaturesMarkedWithLocalView.
@Test
public void testViewBlindFeaturesMarkedWithLocalView() throws Exception {
NGramPPipelineTest test = new NGramPPipelineTest();
test.initialize();
test.parameters = new Object[] { LuceneNGramPFE.PARAM_UNIQUE_EXTRACTOR_NAME, "123", LuceneNGramPFE.PARAM_NGRAM_MIN_N, 1, LuceneNGramPFE.PARAM_NGRAM_MAX_N, 1, LuceneNGramPFE.PARAM_USE_VIEW1_NGRAMS_AS_FEATURES, false, LuceneNGramPFE.PARAM_USE_VIEW2_NGRAMS_AS_FEATURES, false, LuceneNGramPFE.PARAM_USE_VIEWBLIND_NGRAMS_AS_FEATURES, true, LuceneNGramPFE.PARAM_MARK_VIEWBLIND_NGRAMS_WITH_LOCAL_VIEW, true, LuceneNGramPFE.PARAM_SOURCE_LOCATION, test.lucenePath, LuceneNGramPMetaCollector.PARAM_TARGET_LOCATION, test.lucenePath };
test.runPipeline();
assertTrue(test.featureNames.first().startsWith("view1allNG") || test.featureNames.first().startsWith("view2allNG"));
assertEquals(test.featureNames.size(), 12);
assertTrue(test.featureNames.contains("view1allNG_mice"));
assertTrue(test.featureNames.contains("view2allNG_mice"));
int pos = 0;
int neg = 0;
for (Feature feature : test.instanceList.get(0).getFeatures()) {
Integer value = ((Double) feature.getValue()).intValue();
if (value == 1) {
pos++;
} else {
neg++;
}
}
assertEquals(pos, 8);
assertEquals(neg, 4);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class KeywordCPPipelineTest method testNonBinaryFeatureValues.
// TODO: Write a symmetry test. Note that features will be the same. Needs different dataset.
@Test
public void testNonBinaryFeatureValues() throws Exception {
KeywordCPPipelineTest test = new KeywordCPPipelineTest();
test.initialize();
test.parameters = new Object[] { LuceneKeywordCPFE.PARAM_UNIQUE_EXTRACTOR_NAME, "123", LuceneKeywordCPFE.PARAM_USE_VIEW1_KEYWORD_NGRAMS_AS_FEATURES, false, LuceneKeywordCPFE.PARAM_USE_VIEW2_KEYWORD_NGRAMS_AS_FEATURES, false, LuceneKeywordCPFE.PARAM_USE_VIEWBLIND_KEYWORD_NGRAMS_AS_FEATURES, false, LuceneKeywordCPFE.PARAM_NGRAM_KEYWORDS_FILE, "src/test/resources/data/keywordlist.txt", LuceneKeywordCPFE.PARAM_SOURCE_LOCATION, test.lucenePath, LuceneKeywordCPMetaCollector.PARAM_TARGET_LOCATION, test.lucenePath, LuceneKeywordCPFE.PARAM_NGRAM_BINARY_FEATURE_VALUES_COMBO, false, LuceneKeywordCPFE.PARAM_KEYWORD_NGRAM_MAX_N_COMBO, 2, LuceneKeywordCPFE.PARAM_KEYWORD_NGRAM_SYMMETRY_COMBO, true };
test.runPipeline();
int four = 0;
int three = 0;
int two = 0;
int one = 0;
int zero = 0;
for (Feature feature : test.instanceList.get(0).getFeatures()) {
Integer value = ((Double) feature.getValue()).intValue();
if (new Integer(value) == 4) {
four++;
}
if (new Integer(value) == 3) {
three++;
}
if (new Integer(value) == 2) {
two++;
}
if (new Integer(value) == 1) {
one++;
}
if (new Integer(value) == 0) {
zero++;
}
}
assertEquals(four, 1);
assertEquals(three, 6);
assertEquals(two, 9);
assertEquals(one, 8);
assertEquals(zero, 0);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class NamedEntityPerSentenceRatio method extract.
@Override
public Set<Feature> extract(JCas view, TextClassificationTarget aTarget) throws TextClassificationException {
Set<Feature> featList = new TreeSet<Feature>();
int numOrgaNE = JCasUtil.selectCovered(view, Organization.class, aTarget).size();
int numPersonNE = JCasUtil.selectCovered(view, Person.class, aTarget).size();
int numLocNE = JCasUtil.selectCovered(view, Location.class, aTarget).size();
int numSentences = JCasUtil.selectCovered(view, Sentence.class, aTarget).size();
if (numSentences > 0) {
featList.add(new Feature("NrOfOrganizationEntities", numOrgaNE, FeatureType.NUMERIC));
featList.add(new Feature("NrOfPersonEntities", numPersonNE, FeatureType.NUMERIC));
featList.add(new Feature("NrOfLocationEntities", numLocNE, FeatureType.NUMERIC));
featList.add(new Feature("NrOfOrganizationEntitiesPerSent", Math.round(((float) numOrgaNE / numSentences) * 100f) / 100f, FeatureType.NUMERIC));
featList.add(new Feature("NrOfPersonEntitiesPerSent", Math.round(((float) numPersonNE / numSentences) * 100f) / 100f, FeatureType.NUMERIC));
featList.add(new Feature("NrOfLocationEntitiesPerSent", Math.round(((float) numLocNE / numSentences) * 100f) / 100f, FeatureType.NUMERIC));
}
return featList;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class LibsvmDataFormatWriter method writeClassifierFormat.
@Override
public void writeClassifierFormat(Collection<Instance> in) throws AnalysisEngineProcessException {
try {
if (featureNames2id == null) {
createFeatureNameMap();
}
initClassifierFormat();
List<Instance> instances = new ArrayList<>(in);
for (Instance instance : instances) {
Map<Integer, Double> entry = new HashMap<>();
recordInstanceId(instance, maxId++, index2instanceId);
for (Feature f : instance.getFeatures()) {
Integer id = featureNames2id.get(f.getName());
Double val = toValue(f.getValue());
if (Math.abs(val) < 0.00000001) {
// skip zero values
continue;
}
entry.put(id, val);
}
List<Integer> keys = new ArrayList<Integer>(entry.keySet());
Collections.sort(keys);
if (isRegression()) {
bw.append(instance.getOutcome() + "\t");
} else {
bw.append(outcomeMap.get(instance.getOutcome()) + "\t");
}
bw.append(injectSequenceId(instance));
for (int i = 0; i < keys.size(); i++) {
Integer key = keys.get(i);
Double value = entry.get(key);
bw.append("" + key.toString() + ":" + value.toString());
if (i + 1 < keys.size()) {
bw.append("\t");
}
}
bw.append("\n");
}
writeMapping(outputDirectory, INDEX2INSTANCEID, index2instanceId);
writeFeatureName2idMapping(outputDirectory, AdapterFormat.getFeatureNameMappingFilename(), featureNames2id);
writeOutcomeMapping(outputDirectory, AdapterFormat.getOutcomeMappingFilename(), outcomeMap);
} catch (Exception e) {
throw new AnalysisEngineProcessException(e);
} finally {
IOUtils.closeQuietly(bw);
// important, we reopen the stream only if the pointer is null!
bw = null;
}
}
Aggregations