Search in sources :

Example 26 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class ManualTaggerAdapterTest method testMultiplePOS.

@Test
public void testMultiplePOS() throws Exception {
    List<String> l = Arrays.asList("inflectedform2");
    List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(1, analyzedTokenReadings.size());
    AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform2", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(3, analyzedTokenReading.getReadingsLength());
    AnalyzedToken analyzedToken;
    analyzedToken = analyzedTokenReading.getReadings().get(0);
    assertEquals("POS1a", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
    analyzedToken = analyzedTokenReading.getReadings().get(1);
    assertEquals("POS1b", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
    analyzedToken = analyzedTokenReading.getReadings().get(2);
    assertEquals("POS1c", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 27 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class ManualTaggerAdapterTest method testMultipleLemma.

@Test
public void testMultipleLemma() throws Exception {
    List<String> l = Arrays.asList("inflectedform3");
    List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(1, analyzedTokenReadings.size());
    AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform3", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(4, analyzedTokenReading.getReadingsLength());
    AnalyzedToken analyzedToken;
    analyzedToken = analyzedTokenReading.getReadings().get(0);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3a", analyzedToken.getLemma());
    assertEquals("POS3a", analyzedToken.getPOSTag());
    analyzedToken = analyzedTokenReading.getReadings().get(1);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3b", analyzedToken.getLemma());
    assertEquals("POS3b", analyzedToken.getPOSTag());
    analyzedToken = analyzedTokenReading.getReadings().get(2);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3c", analyzedToken.getLemma());
    assertEquals("POS3c", analyzedToken.getPOSTag());
    analyzedToken = analyzedTokenReading.getReadings().get(3);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3d", analyzedToken.getLemma());
    assertEquals("POS3d", analyzedToken.getPOSTag());
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 28 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class RuleFilterEvaluatorTest method testDuplicateKey.

@Test(expected = RuntimeException.class)
public void testDuplicateKey() throws Exception {
    AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "SENT_START", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0) };
    eval.getResolvedArguments("year:\\1 year:\\2", readingsList, Arrays.asList(1, 2));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 29 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class GermanHelperTest method testHasReadingOfType.

@Test
public void testHasReadingOfType() throws Exception {
    AnalyzedTokenReadings readings = new AnalyzedTokenReadings(new AnalyzedToken("der", "ART:DEF:DAT:SIN:FEM", null), 0);
    assertTrue(GermanHelper.hasReadingOfType(readings, GermanToken.POSType.DETERMINER));
    assertFalse(GermanHelper.hasReadingOfType(readings, GermanToken.POSType.NOMEN));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 30 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class LanguageToolFilter method incrementToken.

@Override
public boolean incrementToken() throws IOException {
    if (posStack.size() > 0) {
        String pop = posStack.pop();
        restoreState(current);
        termAtt.append(pop);
        posIncrAtt.setPositionIncrement(0);
        typeAtt.setType("pos");
        return true;
    }
    if (tokenIter == null || !tokenIter.hasNext()) {
        // there are no remaining tokens from the current sentence... are there more sentences?
        if (input.incrementToken()) {
            // a new sentence is available: process it.
            String sentenceStr = termAtt.toString();
            collectedInput.append(sentenceStr);
            if (sentenceStr.length() >= 255) {
                // later. See https://github.com/languagetool-org/languagetool/issues/364
                return true;
            } else {
                sentenceStr = collectedInput.toString();
                collectedInput.setLength(0);
            }
            AnalyzedSentence sentence = languageTool.getAnalyzedSentence(sentenceStr);
            List<AnalyzedTokenReadings> tokenBuffer = Arrays.asList(sentence.getTokens());
            tokenIter = tokenBuffer.iterator();
            /*
         * it should not be possible to have a sentence with 0 words, check just in case. returning
         * EOS isn't the best either, but it's the behavior of the original code.
         */
            if (!tokenIter.hasNext()) {
                return false;
            }
        } else {
            // no more sentences, end of stream!
            return false;
        }
    }
    // It must clear attributes, as it is creating new tokens.
    clearAttributes();
    AnalyzedTokenReadings tr = tokenIter.next();
    // add POS tag for sentence start.
    if (tr.isSentenceStart()) {
        // TODO: would be needed so negated tokens can match on something (see testNegatedMatchAtSentenceStart())
        // but breaks other cases:
        //termAtt.append("SENT_START");
        typeAtt.setType("pos");
        String posTag = tr.getAnalyzedToken(0).getPOSTag();
        String lemma = tr.getAnalyzedToken(0).getLemma();
        if (toLowerCase) {
            termAtt.append(POS_PREFIX.toLowerCase()).append(posTag.toLowerCase());
            if (lemma != null) {
                termAtt.append(LEMMA_PREFIX.toLowerCase()).append(lemma.toLowerCase());
            }
        } else {
            termAtt.append(POS_PREFIX).append(posTag);
            if (lemma != null) {
                termAtt.append(LEMMA_PREFIX).append(lemma);
            }
        }
        return true;
    }
    // by pass the white spaces.
    if (tr.isWhitespace()) {
        return this.incrementToken();
    }
    offsetAtt.setOffset(tr.getStartPos(), tr.getEndPos());
    for (AnalyzedToken token : tr) {
        if (token.getPOSTag() != null) {
            if (toLowerCase) {
                posStack.push(POS_PREFIX.toLowerCase() + token.getPOSTag().toLowerCase());
            } else {
                posStack.push(POS_PREFIX + token.getPOSTag());
            }
        }
        if (token.getLemma() != null) {
            if (toLowerCase) {
                posStack.push(LEMMA_PREFIX.toLowerCase() + token.getLemma().toLowerCase());
            } else {
                // chances are good this is the same for all loop iterations, store it anyway...
                posStack.push(LEMMA_PREFIX + token.getLemma());
            }
        }
    }
    current = captureState();
    if (toLowerCase) {
        termAtt.append(tr.getAnalyzedToken(0).getToken().toLowerCase());
    } else {
        termAtt.append(tr.getAnalyzedToken(0).getToken());
    }
    return true;
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Aggregations

AnalyzedToken (org.languagetool.AnalyzedToken)89 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)48 ArrayList (java.util.ArrayList)43 Matcher (java.util.regex.Matcher)16 Test (org.junit.Test)16 IOException (java.io.IOException)9 Pattern (java.util.regex.Pattern)7 Nullable (org.jetbrains.annotations.Nullable)6 TaggedWord (org.languagetool.tagging.TaggedWord)6 RuleMatch (org.languagetool.rules.RuleMatch)4 Synthesizer (org.languagetool.synthesis.Synthesizer)4 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 DictionaryLookup (morfologik.stemming.DictionaryLookup)2 IStemmer (morfologik.stemming.IStemmer)2 AnalyzedSentence (org.languagetool.AnalyzedSentence)2 ChunkTag (org.languagetool.chunking.ChunkTag)2