use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class CharacterNGram method extract.
@Override
public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException {
Set<Feature> features = new HashSet<Feature>();
FrequencyDistribution<String> documentCharNgrams = CharacterNGramMC.getAnnotationCharacterNgrams(aTarget, ngramLowerCase, ngramMinN, ngramMaxN, '^', '$');
for (String topNgram : topKSet.getKeys()) {
if (documentCharNgrams.getKeys().contains(topNgram)) {
features.add(new Feature(getFeaturePrefix() + "_" + topNgram, 1, FeatureType.BOOLEAN));
} else {
features.add(new Feature(getFeaturePrefix() + "_" + topNgram, 0, true, FeatureType.BOOLEAN));
}
}
return features;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class NgramUnitTest method evaluateExtractedFeatures.
private void evaluateExtractedFeatures(File output) throws Exception {
Gson gson = new Gson();
List<String> lines = FileUtils.readLines(new File(output, JsonDataWriter.JSON_FILE_NAME), "utf-8");
List<Instance> instances = new ArrayList<>();
for (String l : lines) {
instances.add(gson.fromJson(l, Instance.class));
}
assertEquals(8, instances.size());
Iterator<Instance> iterator = instances.iterator();
int numFeatValueOne = 0;
int numFeatValuesZero = 0;
while (iterator.hasNext()) {
Instance next = iterator.next();
List<Feature> arrayList = new ArrayList<Feature>(next.getFeatures());
assertEquals(1, arrayList.size());
Object value = arrayList.get(0).getValue();
if ((double) value == 1.0) {
numFeatValueOne++;
} else if ((double) value == 0.0) {
numFeatValuesZero++;
} else {
throw new IllegalStateException("Value should either be 1.0 or .0.0 but was [" + value + "]");
}
}
assertEquals(2, numFeatValueOne);
assertEquals(6, numFeatValuesZero);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNounChunkCharacterLengthTest method extractTest1.
@Test
public void extractTest1() throws Exception {
Chunk chunk1 = new Chunk(jcas1, 0, 4);
chunk1.addToIndexes();
Chunk chunk2 = new Chunk(jcas2, 0, 4);
chunk2.addToIndexes();
DiffNounChunkCharacterLength extractor = new DiffNounChunkCharacterLength();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
FeatureTestUtil.assertFeature("DiffNounPhraseCharacterLength", 0.0, feature, 0.0001);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNounChunkCharacterLengthTest method extractTest2.
@Test
public void extractTest2() throws Exception {
Chunk chunk1 = new Chunk(jcas1, 0, 4);
chunk1.addToIndexes();
Chunk chunk2 = new Chunk(jcas2, 0, 7);
chunk2.addToIndexes();
DiffNounChunkCharacterLength extractor = new DiffNounChunkCharacterLength();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffNounPhraseCharacterLength", -3.0, feature, 0.0001);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNrOfSentencesPairFeatureExtractorTest method testExtract.
@Test
public void testExtract() throws ResourceInitializationException, AnalysisEngineProcessException, TextClassificationException {
AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas1 = engine.newJCas();
jcas1.setDocumentLanguage("en");
jcas1.setDocumentText("This is the text of view 1. And some more.");
engine.process(jcas1);
JCas jcas2 = engine.newJCas();
jcas2.setDocumentLanguage("en");
jcas2.setDocumentText("This is the text of view 2.");
engine.process(jcas2);
DiffNrOfSentencesPairFeatureExtractor extractor = new DiffNrOfSentencesPairFeatureExtractor();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffNrOfSentences", 1, feature);
}
}
Aggregations