use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class CosineSimilarityTest method testCosSimWithPosTags.
@Test
public void testCosSimWithPosTags() throws Exception {
CosineSimilarityTest test = new CosineSimilarityTest();
test.initialize();
test.parameters = new Object[] { CosineFeatureExtractor.PARAM_UNIQUE_EXTRACTOR_NAME, "123", CosineFeatureExtractor.PARAM_SOURCE_LOCATION, test.lucenePath.toString(), IdfPairMetaCollector.PARAM_TARGET_LOCATION, test.lucenePath.toString(), CosineFeatureExtractor.PARAM_NGRAM_ANNO_TYPE, "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" };
test.runPipeline();
assertTrue(test.featureNames.first().equals("SimilarityCosineSimilarity"));
assertEquals(test.featureNames.size(), 1);
for (Feature feat : test.instanceList.get(0).getFeatures()) {
assertEquals(0.2, (double) feat.getValue(), epsilon);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class LuceneKeywordPFE method extract.
@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
FrequencyDistribution<String> view1Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view1, aTarget1, ngramMinN1, ngramMaxN1, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
FrequencyDistribution<String> view2Ngrams = KeywordNGramUtils.getDocumentKeywordNgrams(view2, aTarget2, ngramMinN2, ngramMaxN2, markSentenceBoundary, markSentenceLocation, includeCommas, keywords);
FrequencyDistribution<String> allNgrams = getViewNgrams(view1, view2);
Set<Feature> features = new HashSet<Feature>();
if (useView1NgramsAsFeatures) {
prefix = "keyNG1";
features = addToFeatureArray(view1Ngrams, topKSetView1, features);
}
if (useView2NgramsAsFeatures) {
prefix = "keyNG2";
features = addToFeatureArray(view2Ngrams, topKSetView2, features);
}
if (useViewBlindNgramsAsFeatures && !markViewBlindNgramsWithLocalView) {
prefix = "keyNG";
features = addToFeatureArray(allNgrams, topKSet, features);
}
if (useViewBlindNgramsAsFeatures && markViewBlindNgramsWithLocalView) {
prefix = "keyNGall1";
features = addToFeatureArray(view1Ngrams, topKSet, features);
prefix = "keyNGall2";
features = addToFeatureArray(view2Ngrams, topKSet, features);
}
return features;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class LuceneNGramPFE method extract.
@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
FrequencyDistribution<String> view1Ngrams = null;
FrequencyDistribution<String> view2Ngrams = null;
FrequencyDistribution<String> allNgrams = null;
TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
view1Ngrams = NGramUtils.getDocumentNgrams(view1, aTarget1, ngramLowerCase, filterPartialStopwordMatches, ngramMinN1, ngramMaxN1, stopwords, Token.class);
view2Ngrams = NGramUtils.getDocumentNgrams(view2, aTarget2, ngramLowerCase, filterPartialStopwordMatches, ngramMinN2, ngramMaxN2, stopwords, Token.class);
allNgrams = getViewNgrams(view1, view2);
Set<Feature> features = new HashSet<Feature>();
if (useView1NgramsAsFeatures) {
prefix = "view1NG";
features = addToFeatureArray(view1Ngrams, topKSetView1, features);
}
if (useView2NgramsAsFeatures) {
prefix = "view2NG";
features = addToFeatureArray(view2Ngrams, topKSetView2, features);
}
if (useViewBlindNgramsAsFeatures && !markViewBlindNgramsWithLocalView) {
prefix = "allNG";
features = addToFeatureArray(allNgrams, topKSet, features);
}
if (useViewBlindNgramsAsFeatures && markViewBlindNgramsWithLocalView) {
prefix = "view1allNG";
features = addToFeatureArray(view1Ngrams, topKSet, features);
prefix = "view2allNG";
features = addToFeatureArray(view2Ngrams, topKSet, features);
}
return features;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class SentenceRatioPerDocument method extract.
@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException {
long maxLen = getMax();
List<Sentence> sentences = JCasUtil.selectCovered(jcas, Sentence.class, aTarget);
double ratio = getRatio(sentences.size(), maxLen);
return new Feature(FEATURE_NAME, ratio, FeatureType.NUMERIC).asSet();
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class SkipWordNGram method extract.
@Override
public Set<Feature> extract(JCas jcas, TextClassificationTarget aTarget) throws TextClassificationException {
Set<Feature> features = new HashSet<Feature>();
FrequencyDistribution<String> documentNgrams = SkipWordNGramMC.getDocumentSkipNgrams(jcas, aTarget, ngramLowerCase, filterPartialStopwordMatches, ngramMinN, ngramMaxN, skipSize, stopwords);
for (String topNgram : topKSet.getKeys()) {
if (documentNgrams.getKeys().contains(topNgram)) {
features.add(new Feature(getFeaturePrefix() + "_" + topNgram, 1, FeatureType.BOOLEAN));
} else {
features.add(new Feature(getFeaturePrefix() + "_" + topNgram, 0, true, FeatureType.BOOLEAN));
}
}
return features;
}
Aggregations