Search in sources :

Example 1 with TextAnalyzer

use of org.apache.stanbol.commons.opennlp.TextAnalyzer in project stanbol by apache.

the class TextAnalyzerTest method testSingleSentenceDefaultConfig.

@Test
public void testSingleSentenceDefaultConfig() {
    TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE);
    AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
    assertNotNull(analysed);
    //check the default config
    assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
    assertTrue(analyzer.getConfig().isPosTaggerEnable());
    assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
    assertTrue(analyzer.getConfig().isChunkerEnabled());
    assertTrue(analyzer.getConfig().isPosTypeChunkerForced());
    checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, true, true);
}
Also used : AnalysedText(org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText) TextAnalyzer(org.apache.stanbol.commons.opennlp.TextAnalyzer) Test(org.junit.Test)

Example 2 with TextAnalyzer

use of org.apache.stanbol.commons.opennlp.TextAnalyzer in project stanbol by apache.

the class TextAnalyzerTest method testSingleSentenceChunkerConfig.

@Test
public void testSingleSentenceChunkerConfig() {
    TextAnalyzerConfig config = new TextAnalyzerConfig();
    config.forcePosTypeChunker(false);
    TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE, config);
    AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
    assertNotNull(analysed);
    //check the default config
    assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
    assertTrue(analyzer.getConfig().isPosTaggerEnable());
    assertTrue(analyzer.getConfig().isChunkerEnabled());
    assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
    assertFalse(analyzer.getConfig().isPosTypeChunkerForced());
    checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, true, true);
}
Also used : AnalysedText(org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText) TextAnalyzer(org.apache.stanbol.commons.opennlp.TextAnalyzer) TextAnalyzerConfig(org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig) Test(org.junit.Test)

Example 3 with TextAnalyzer

use of org.apache.stanbol.commons.opennlp.TextAnalyzer in project stanbol by apache.

the class TextAnalyzerTest method testSingleSentenceNoChunkerNoPosConfig.

@Test
public void testSingleSentenceNoChunkerNoPosConfig() {
    TextAnalyzerConfig config = new TextAnalyzerConfig();
    config.enablePosTagger(false);
    //must be ignored for Chunks if no Pos
    config.enableChunker(true);
    TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE, config);
    AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
    assertNotNull(analysed);
    //check the default config
    assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
    assertFalse(analyzer.getConfig().isPosTaggerEnable());
    assertTrue(analyzer.getConfig().isChunkerEnabled());
    assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
    assertTrue(analyzer.getConfig().isPosTypeChunkerForced());
    checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, false, false);
}
Also used : AnalysedText(org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText) TextAnalyzer(org.apache.stanbol.commons.opennlp.TextAnalyzer) TextAnalyzerConfig(org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig) Test(org.junit.Test)

Example 4 with TextAnalyzer

use of org.apache.stanbol.commons.opennlp.TextAnalyzer in project stanbol by apache.

the class TextAnalyzerTest method testMultipleSentenceDefaultConfig.

@Test
public void testMultipleSentenceDefaultConfig() {
    TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE);
    Iterator<AnalysedText> analysedSentences = analyzer.analyse(MULTI_SENTENCES);
    assertNotNull(analysedSentences);
    int sentenceCount = 0;
    while (analysedSentences.hasNext()) {
        AnalysedText analysed = analysedSentences.next();
        checkSingleSentence(analysed, MULTIPLE_SENTENCE_TOKENS[sentenceCount], true, true);
        sentenceCount++;
    }
    assertTrue(sentenceCount == 3);
}
Also used : AnalysedText(org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText) TextAnalyzer(org.apache.stanbol.commons.opennlp.TextAnalyzer) Test(org.junit.Test)

Example 5 with TextAnalyzer

use of org.apache.stanbol.commons.opennlp.TextAnalyzer in project stanbol by apache.

the class TextAnalyzerTest method testSingleSentenceNoChunkerConfig.

@Test
public void testSingleSentenceNoChunkerConfig() {
    TextAnalyzerConfig config = new TextAnalyzerConfig();
    config.enableChunker(false);
    TextAnalyzer analyzer = new TextAnalyzer(openNLP, LANGUAGE, config);
    AnalysedText analysed = analyzer.analyseSentence(SINGLE_SENTENCE);
    assertNotNull(analysed);
    //check the default config
    assertFalse(analyzer.getConfig().isSimpleTokenizerForced());
    assertTrue(analyzer.getConfig().isPosTaggerEnable());
    assertFalse(analyzer.getConfig().isChunkerEnabled());
    assertTrue(analyzer.getConfig().isPosTypeChunkerEnabled());
    assertTrue(analyzer.getConfig().isPosTypeChunkerForced());
    checkSingleSentence(analysed, SINGLE_SENTENCE_TOKENS, true, false);
}
Also used : AnalysedText(org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText) TextAnalyzer(org.apache.stanbol.commons.opennlp.TextAnalyzer) TextAnalyzerConfig(org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig) Test(org.junit.Test)

Aggregations

TextAnalyzer (org.apache.stanbol.commons.opennlp.TextAnalyzer)5 AnalysedText (org.apache.stanbol.commons.opennlp.TextAnalyzer.AnalysedText)5 Test (org.junit.Test)5 TextAnalyzerConfig (org.apache.stanbol.commons.opennlp.TextAnalyzer.TextAnalyzerConfig)3