use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class InitialCharacterUpperCaseTest method initialLetterTest.
@Test
public void initialLetterTest() throws Exception {
AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
jcas.setDocumentText("he Loves it");
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 3, 8);
aTarget.addToIndexes();
InitialCharacterUpperCase extractor = new InitialCharacterUpperCase();
Set<Feature> features = extractor.extract(jcas, aTarget);
List<Feature> fetList = new ArrayList<>(features);
Assert.assertEquals(1, features.size());
Assert.assertEquals(InitialCharacterUpperCase.FEATURE_NAME, fetList.get(0).getName());
Assert.assertEquals(1.0, (double) fetList.get(0).getValue(), 0.1);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class PronounRatioTest method posContextFeatureExtractorTest.
@Test
public void posContextFeatureExtractorTest() throws Exception {
AnalysisEngineDescription desc = createEngineDescription(createEngineDescription(BreakIteratorSegmenter.class), createEngineDescription(OpenNlpPosTagger.class, OpenNlpPosTagger.PARAM_LANGUAGE, "en"));
AnalysisEngine engine = createEngine(desc);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
jcas.setDocumentText("He is no tester. I am a tester.");
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
aTarget.addToIndexes();
PronounRatioFeatureExtractor extractor = new PronounRatioFeatureExtractor();
List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
Assert.assertEquals(7, features.size());
for (Feature feature : features) {
if (feature.getName().equals(FN_HE_RATIO)) {
assertFeature(FN_HE_RATIO, 0.5, feature);
} else if (feature.getName().equals(FN_WE_RATIO)) {
assertFeature(FN_WE_RATIO, 0.0, feature);
}
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class EmoticonRatioTest method emoticonRatioFeatureExtractorTest.
@Test
public void emoticonRatioFeatureExtractorTest() throws Exception {
AnalysisEngineDescription desc = createEngineDescription(NoOpAnnotator.class);
AnalysisEngine engine = createEngine(desc);
TokenBuilder<Token, Sentence> builder = TokenBuilder.create(Token.class, Sentence.class);
JCas jcas = engine.newJCas();
jcas.setDocumentLanguage("en");
builder.buildTokens(jcas, "This is a very emotional tweet ;-)");
POS_EMO emo = new POS_EMO(jcas);
emo.setBegin(31);
emo.setEnd(34);
emo.addToIndexes();
engine.process(jcas);
TextClassificationTarget aTarget = new TextClassificationTarget(jcas, 0, jcas.getDocumentText().length());
aTarget.addToIndexes();
EmoticonRatio extractor = new EmoticonRatio();
List<Feature> features = new ArrayList<Feature>(extractor.extract(jcas, aTarget));
Assert.assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature(EmoticonRatio.class.getSimpleName(), 0.14, feature, 0.01);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class CrfSuiteFeatureFormatExtractionIterator method next.
@Override
public StringBuilder next() {
StringBuilder sb = new StringBuilder();
try {
String lastSeenSeqId = idInitVal;
boolean seqIdChanged = false;
for (; insIdx < instances.size(); insIdx++) {
Instance i = instances.get(insIdx);
String id = getId(i);
if (!lastSeenSeqId.equals(id)) {
seqIdChanged = true;
lastSeenSeqId = getId(i);
}
sb.append(LabelSubstitutor.labelReplacement(i.getOutcome()));
sb.append("\t");
int idx = 0;
for (Feature f : i.getFeatures()) {
sb.append(f.getName() + "=" + f.getValue());
if (idx + 1 < i.getFeatures().size()) {
sb.append("\t");
}
idx++;
}
// Mark first line of new sequence with an additional __BOS__
if (seqIdChanged) {
sb.append("\t");
sb.append("__BOS__");
seqIdChanged = false;
}
// Peak ahead - seqEnd reached?
if (insIdx + 1 < instances.size()) {
Instance next = instances.get(insIdx + 1);
String nextId = getId(next);
if (!lastSeenSeqId.equals(nextId)) {
appendEOS(sb);
insIdx++;
break;
}
} else if (insIdx + 1 == instances.size()) {
appendEOS(sb);
insIdx++;
// We're done
break;
}
sb.append("\n");
}
} catch (Exception e) {
throw new UnsupportedOperationException(e);
}
return sb;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class CrfSuiteDataWriterTest method prepareFeatures.
private void prepareFeatures() throws Exception {
List<Feature> features1 = new ArrayList<Feature>();
features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
features1.add(new Feature("feature3", "Water", FeatureType.STRING));
List<Feature> features2 = new ArrayList<Feature>();
features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
Instance instance1 = new Instance(features1, "1");
instance1.setSequenceId(0);
instance1.setSequencePosition(0);
Instance instance2 = new Instance(features2, "2");
instance2.setSequenceId(0);
instance2.setSequencePosition(1);
Instance instance3 = new Instance(features1, "3");
instance3.setSequenceId(0);
instance3.setSequencePosition(2);
Instance instance4 = new Instance(features1, "4");
instance4.setSequenceId(1);
instance4.setSequencePosition(0);
Instance instance5 = new Instance(features2, "4");
instance5.setSequenceId(1);
instance5.setSequencePosition(1);
instances.add(instance1);
instances.add(instance2);
instances.add(instance3);
instances.add(instance4);
instances.add(instance5);
}
Aggregations