use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class LuceneNGramCPFE method extract.
@Override
public Set<Feature> extract(JCas view1, JCas view2) throws TextClassificationException {
FrequencyDistribution<String> view1Ngrams = null;
FrequencyDistribution<String> view2Ngrams = null;
TextClassificationTarget aTarget1 = JCasUtil.selectSingle(view1, TextClassificationTarget.class);
TextClassificationTarget aTarget2 = JCasUtil.selectSingle(view2, TextClassificationTarget.class);
view1Ngrams = NGramUtils.getDocumentNgrams(view1, aTarget1, ngramLowerCase, filterPartialStopwordMatches, ngramMinN1, ngramMaxN1, stopwords, Token.class);
view2Ngrams = NGramUtils.getDocumentNgrams(view2, aTarget2, ngramLowerCase, filterPartialStopwordMatches, ngramMinN2, ngramMaxN2, stopwords, Token.class);
FrequencyDistribution<String> documentComboNgrams = ComboUtils.getCombinedNgrams(view1Ngrams, view2Ngrams, ngramMinNCombo, ngramMaxNCombo, ngramUseSymmetricalCombos);
Set<Feature> features = new HashSet<Feature>();
prefix = "comboNG";
features = addToFeatureArray(documentComboNgrams, topKSetCombo, features);
return features;
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNounChunkTokenLengthTest method extractTest1.
@Test
public void extractTest1() throws Exception {
Chunk chunk1 = new Chunk(jcas1, 0, 4);
chunk1.addToIndexes();
Chunk chunk2 = new Chunk(jcas2, 0, 4);
chunk2.addToIndexes();
DiffNounChunkTokenLength extractor = new DiffNounChunkTokenLength();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffNounPhraseTokenLength", 0.0, feature, 0.0001);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNounChunkTokenLengthTest method extractTest2.
@Test
public void extractTest2() throws Exception {
Chunk chunk1 = new Chunk(jcas1, 0, 4);
chunk1.addToIndexes();
Chunk chunk2 = new Chunk(jcas2, 0, 7);
chunk2.addToIndexes();
DiffNounChunkTokenLength extractor = new DiffNounChunkTokenLength();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffNounPhraseTokenLength", -1.0, feature, 0.0001);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class DiffNrOfTokensPairFeatureExtractorTest method testExtract.
@Test
public void testExtract() throws ResourceInitializationException, AnalysisEngineProcessException, TextClassificationException {
AnalysisEngineDescription desc = createEngineDescription(BreakIteratorSegmenter.class);
AnalysisEngine engine = createEngine(desc);
JCas jcas1 = engine.newJCas();
jcas1.setDocumentLanguage("en");
jcas1.setDocumentText("This is the text of view 1. And some more.");
engine.process(jcas1);
JCas jcas2 = engine.newJCas();
jcas2.setDocumentLanguage("en");
jcas2.setDocumentText("This is the text of view 2.");
engine.process(jcas2);
DiffNrOfTokensPairFeatureExtractor extractor = new DiffNrOfTokensPairFeatureExtractor();
Set<Feature> features = extractor.extract(jcas1, jcas2);
assertEquals(1, features.size());
for (Feature feature : features) {
assertFeature("DiffNrOfTokens", 4, feature);
}
}
use of org.dkpro.tc.api.features.Feature in project dkpro-tc by dkpro.
the class NGramCPPipelineTest method testNonBinaryFeatureValues.
// TODO: Write a symmetry test. Note that features will be the same. Needs different dataset.
@Test
public void testNonBinaryFeatureValues() throws Exception {
NGramCPPipelineTest test = new NGramCPPipelineTest();
test.initialize();
test.parameters = new Object[] { LuceneNGramCPFE.PARAM_UNIQUE_EXTRACTOR_NAME, "123", LuceneNGramCPFE.PARAM_USE_VIEW1_NGRAMS_AS_FEATURES, false, LuceneNGramCPFE.PARAM_USE_VIEW2_NGRAMS_AS_FEATURES, false, LuceneNGramCPFE.PARAM_USE_VIEWBLIND_NGRAMS_AS_FEATURES, false, LuceneNGramCPFE.PARAM_SOURCE_LOCATION, test.lucenePath, LuceneNGramPMetaCollector.PARAM_TARGET_LOCATION, test.lucenePath, LuceneNGramCPFE.PARAM_NGRAM_BINARY_FEATURE_VALUES_COMBO, false, LuceneNGramCPFE.PARAM_NGRAM_MAX_N_COMBO, 2, LuceneNGramCPFE.PARAM_NGRAM_SYMMETRY_COMBO, true };
test.runPipeline();
int two = 0;
int one = 0;
int zero = 0;
for (Feature feature : test.instanceList.get(0).getFeatures()) {
Integer value = ((Double) feature.getValue()).intValue();
if (new Integer(value) == 2) {
two++;
}
if (new Integer(value) == 1) {
one++;
}
if (new Integer(value) == 0) {
zero++;
}
}
assertEquals(two, 4);
assertEquals(one, 12);
assertEquals(zero, 0);
}
Aggregations