use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class LuceneKeywordCPFE method extract.
@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
FrequencyDistribution<String> view1Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view1, aTarget1, ngramMinN1, ngramMaxN1, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
FrequencyDistribution<String> view2Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view2, aTarget2, ngramMinN2, ngramMaxN2, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
FrequencyDistribution<String> documentComboNgrams = ComboUtils.getCombinedNgrams(view1Ngrams, view2Ngrams, ngramMinNCombo, ngramMaxNCombo, ngramUseSymmetricalCombos);
prefix = "comboKNG";
Set<Feature> features = new HashSet<Feature>();
addToFeatureArray(documentComboNgrams, topKSetCombo, features);
return features;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNrOfCharactersPairFeatureExtractorTest method testExtract.
@Test
public void testExtract() throws ResourceInitializationException, AnalysisEngineProcessException, TextClassificationException {
AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas1 = engine.newJCas();
jcas1.setDocumentLanguage("en");
jcas1.setDocumentText("This is the text of view 1. And some more.");
engine.process(jcas1);
JCas jcas2 = engine.newJCas();
jcas2.setDocumentLanguage("en");
jcas2.setDocumentText("This is the text of view 2");
engine.process(jcas2);
DiffNrOfCharactersPairFeatureExtractor extractor = new DiffNrOfCharactersPairFeatureExtractor();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffNrOfCharacters", 16, feature);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class EmoticonRatio method extract.
@Override
public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException {
int nrOfEmoticons = JCasUtil.selectCovered(jCas, POS_EMO.class, aTarget).size();
int nrOfTokens = JCasUtil.selectCovered(jCas, Token.class, aTarget).size();
double ratio = (double) nrOfEmoticons / nrOfTokens;
return new Feature(EmoticonRatio.class.getSimpleName(), ratio, FeatureType.NUMERIC).asSet();
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class NETest method nEFeatureExtractorTest.
@Test
public void nEFeatureExtractorTest() throws Exception {
AnalysisEngine engine = createEngine(NoOpAnnotator.class);
JCas jcas = engine.newJCas();
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, 22);
aTarget.addToIndexes();
Location l1 = new Location(jcas, 0, 5);
Person p1 = new Person(jcas, 0, 5);
Organization o1 = new Organization(jcas, 0, 5);
Sentence s1 = new Sentence(jcas, 0, 15);
Sentence s2 = new Sentence(jcas, 15, 22);
l1.addToIndexes();
p1.addToIndexes();
o1.addToIndexes();
s1.addToIndexes();
s2.addToIndexes();
NamedEntityPerSentenceRatio extractor = new NamedEntityPerSentenceRatio();
Set<Feature> features1 = extractor.extract(jcas, aTarget);
assertEquals(6, features1.size());
testFeatures(features1, 1, 1, 1, 0.5f, 0.5f, 0.5f);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class QuestionRatioTest method questionRatioFeatureExtractorTest.
@Test
public void questionRatioFeatureExtractorTest() throws Exception {
AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
jcas.setDocumentText("Is he a tester???? Really?? He is a tester! Oh yes.");
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
aTarget.addToIndexes();
QuestionsRatioFeatureExtractor extractor = new QuestionsRatioFeatureExtractor();
List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
Assert.assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature(FN_QUESTION_RATIO, 0.5, feature);
}
}
Aggregations