use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class LuceneKeywordCPFE method extract.
@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
FrequencyDistribution<String> view1Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view1, aTarget1, ngramMinN1, ngramMaxN1, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
FrequencyDistribution<String> view2Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view2, aTarget2, ngramMinN2, ngramMaxN2, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
FrequencyDistribution<String> documentComboNgrams = ComboUtils.getCombinedNgrams(view1Ngrams, view2Ngrams, ngramMinNCombo, ngramMaxNCombo, ngramUseSymmetricalCombos);
prefix = "comboKNG";
Set<Feature> features = new HashSet<Feature>();
addToFeatureArray(documentComboNgrams, topKSetCombo, features);
return features;
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class NETest method nEFeatureExtractorTest.
@Test
public void nEFeatureExtractorTest() throws Exception {
AnalysisEngine engine = createEngine(NoOpAnnotator.class);
JCas jcas = engine.newJCas();
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, 22);
aTarget.addToIndexes();
Location l1 = new Location(jcas, 0, 5);
Person p1 = new Person(jcas, 0, 5);
Organization o1 = new Organization(jcas, 0, 5);
Sentence s1 = new Sentence(jcas, 0, 15);
Sentence s2 = new Sentence(jcas, 15, 22);
l1.addToIndexes();
p1.addToIndexes();
o1.addToIndexes();
s1.addToIndexes();
s2.addToIndexes();
NamedEntityPerSentenceRatio extractor = new NamedEntityPerSentenceRatio();
Set<Feature> features1 = extractor.extract(jcas, aTarget);
assertEquals(6, features1.size());
testFeatures(features1, 1, 1, 1, 0.5f, 0.5f, 0.5f);
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class QuestionRatioTest method questionRatioFeatureExtractorTest.
@Test
public void questionRatioFeatureExtractorTest() throws Exception {
AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
jcas.setDocumentText("Is he a tester???? Really?? He is a tester! Oh yes.");
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
aTarget.addToIndexes();
QuestionsRatioFeatureExtractor extractor = new QuestionsRatioFeatureExtractor();
List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
Assert.assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature(FN_QUESTION_RATIO, 0.5, feature);
}
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class InstanceIdFeatureTest method instanceIdFeatureTest.
@Test
public void instanceIdFeatureTest() throws Exception {
AnalysisEngine engine = createEngine(NoOpAnnotator.class);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
engine.process(jcas);
TextClassificationTarget unit1 = new TextClassificationTarget(jcas, 0, 1);
unit1.setId(0);
unit1.addToIndexes();
JCasId id = new JCasId(jcas);
id.setId(123);
id.addToIndexes();
Feature feature = InstanceIdFeature.retrieve(jcas, unit1);
Feature feature2 = InstanceIdFeature.retrieve(jcas);
Feature feature3 = InstanceIdFeature.retrieve(jcas, unit1, 5);
assertEquals(feature.getValue(), "123_0");
assertEquals(feature2.getValue(), "123");
assertEquals(feature3.getValue(), "123_5_0");
}
use of org.dkpro.tc.api.type.TextClassificationTarget in project dkpro-tc by dkpro.
the class DocumentModeAnnotator method process.
@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
if (!featureMode.equals(Constants.FM_DOCUMENT)) {
return;
}
if (JCasUtil.exists(aJCas, TextClassificationTarget.class)) {
return;
}
TextClassificationTarget aTarget = new TextClassificationTarget(aJCas, 0, aJCas.getDocumentText().length());
aTarget.addToIndexes();
}
Aggregations