use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class CrfSuiteFeatureFormatExtractionIteratorTest method buildFeatures.
private void buildFeatures() throws Exception {
fs = new ArrayList<>();
List<Feature> features1 = new ArrayList<Feature>();
features1.add(new Feature("feature1", 1.0, FeatureType.NUMERIC));
features1.add(new Feature("feature2", 0.0, FeatureType.NUMERIC));
features1.add(new Feature("feature3", "Water", FeatureType.STRING));
List<Feature> features2 = new ArrayList<Feature>();
features2.add(new Feature("feature2", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature1", 0.5, FeatureType.NUMERIC));
features2.add(new Feature("feature3", "Fanta", FeatureType.STRING));
Instance instance1 = new Instance(features1, "1");
instance1.setJcasId(0);
instance1.setSequenceId(0);
instance1.setSequencePosition(0);
Instance instance2 = new Instance(features2, "2");
instance1.setJcasId(0);
instance2.setSequenceId(0);
instance2.setSequencePosition(1);
Instance instance3 = new Instance(features1, "3");
instance1.setJcasId(0);
instance3.setSequenceId(0);
instance3.setSequencePosition(2);
Instance instance4 = new Instance(features1, "4");
instance1.setJcasId(0);
instance4.setSequenceId(1);
instance4.setSequencePosition(0);
Instance instance5 = new Instance(features2, "4");
instance1.setJcasId(0);
instance5.setSequenceId(1);
instance5.setSequencePosition(1);
fs.add(instance1);
fs.add(instance2);
fs.add(instance3);
fs.add(instance4);
fs.add(instance5);
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class TypeTokenPairFeatureExtractorTest method testExtract.
@Test
public void testExtract() throws TextClassificationException {
TypeTokenPairFeatureExtractor extractor = new TypeTokenPairFeatureExtractor();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffTypeTokenRatio", 1.33, feature, 0.1);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class AdjectiveEndingFeatureExtractor method extract.
@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) {
double able = 0;
double al = 0;
double ful = 0;
double ible = 0;
double ic = 0;
double ive = 0;
double less = 0;
double ous = 0;
double ly = 0;
int n = 0;
for (POS_ADJ adj : JCasUtil.selectCovered(jcas, POS_ADJ.class, aTarget)) {
n++;
String text = adj.getCoveredText().toLowerCase();
if (text.endsWith("able")) {
able++;
} else if (text.endsWith("al")) {
al++;
} else if (text.endsWith("ful")) {
ful++;
} else if (text.endsWith("ible")) {
ible++;
} else if (text.endsWith("ic")) {
ic++;
} else if (text.endsWith("ive")) {
ive++;
} else if (text.endsWith("less")) {
less++;
} else if (text.endsWith("ous")) {
ous++;
}
}
int m = 0;
for (POS_ADV adv : JCasUtil.select(jcas, POS_ADV.class)) {
m++;
String text = adv.getCoveredText().toLowerCase();
if (text.endsWith("ly")) {
ly++;
}
}
Set<Feature> featSet = new HashSet<Feature>();
featSet.add(new Feature(ADJ_ENDING1, n > 0 ? able * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING2, n > 0 ? al * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING3, n > 0 ? ful * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING4, n > 0 ? ible * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING5, n > 0 ? less * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING6, n > 0 ? ous * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING7, n > 0 ? ive * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADJ_ENDING8, n > 0 ? ic * 100 / n : 0, n == 0, FeatureType.NUMERIC));
featSet.add(new Feature(ADV_ENDING9, m > 0 ? ly * 100 / m : 0, n == 0, FeatureType.NUMERIC));
return featSet;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class POSRatioFeatureExtractor method extract.
@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException {
Set<Feature> features = new HashSet<Feature>();
double total = selectCovered(jcas, POS.class, aTarget).size();
double adj = selectCovered(jcas, POS_ADJ.class, aTarget).size() / total;
double adv = selectCovered(jcas, POS_ADV.class, aTarget).size() / total;
double art = selectCovered(jcas, POS_DET.class, aTarget).size() / total;
double card = selectCovered(jcas, POS_NUM.class, aTarget).size() / total;
double conj = selectCovered(jcas, POS_CONJ.class, aTarget).size() / total;
double noun = selectCovered(jcas, POS_NOUN.class, aTarget).size() / total;
double propNoun = selectCovered(jcas, POS_PROPN.class, aTarget).size() / total;
double other = selectCovered(jcas, POS_X.class, aTarget).size() / total;
double prep = selectCovered(jcas, POS_ADP.class, aTarget).size() / total;
double pron = selectCovered(jcas, POS_PRON.class, aTarget).size() / total;
double punc = selectCovered(jcas, POS_PUNCT.class, aTarget).size() / total;
double verb = selectCovered(jcas, POS_VERB.class, aTarget).size() / total;
features.add(new Feature(FN_ADJ_RATIO, adj, FeatureType.NUMERIC));
features.add(new Feature(FN_ADV_RATIO, adv, FeatureType.NUMERIC));
features.add(new Feature(FN_ART_RATIO, art, FeatureType.NUMERIC));
features.add(new Feature(FN_CARD_RATIO, card, FeatureType.NUMERIC));
features.add(new Feature(FN_CONJ_RATIO, conj, FeatureType.NUMERIC));
features.add(new Feature(FN_N_RATIO, noun + propNoun, FeatureType.NUMERIC));
features.add(new Feature(FN_O_RATIO, other, FeatureType.NUMERIC));
features.add(new Feature(FN_PR_RATIO, pron, FeatureType.NUMERIC));
features.add(new Feature(FN_PP_RATIO, prep, FeatureType.NUMERIC));
features.add(new Feature(FN_PUNC_RATIO, punc, FeatureType.NUMERIC));
features.add(new Feature(FN_V_RATIO, verb, FeatureType.NUMERIC));
return features;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class NumberOfHashTags method extract.
@Override
public Set<Feature> extract(JCas jCas, TextClassificationTarget aTarget) throws TextClassificationException {
Matcher hashTagMatcher = HASHTAG_PATTERN.matcher(jCas.getDocumentText().substring(aTarget.getBegin(), aTarget.getEnd()));
int numberOfHashTags = 0;
while (hashTagMatcher.find()) {
numberOfHashTags++;
}
return new Feature(NumberOfHashTags.class.getSimpleName(), numberOfHashTags, FeatureType.NUMERIC).asSet();
}
Aggregations