Search in sources :

Example 36 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class ManualTaggerAdapterTest method testMultipleLemma.

@Test
public void testMultipleLemma() throws Exception {
    List<String> l = Arrays.asList("inflectedform3");
    List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(1, analyzedTokenReadings.size());
    AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform3", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(4, analyzedTokenReading.getReadingsLength());
    AnalyzedToken analyzedToken;
    analyzedToken = analyzedTokenReading.getReadings().get(0);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3a", analyzedToken.getLemma());
    assertEquals("POS3a", analyzedToken.getPOSTag());
    analyzedToken = analyzedTokenReading.getReadings().get(1);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3b", analyzedToken.getLemma());
    assertEquals("POS3b", analyzedToken.getPOSTag());
    analyzedToken = analyzedTokenReading.getReadings().get(2);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3c", analyzedToken.getLemma());
    assertEquals("POS3c", analyzedToken.getPOSTag());
    analyzedToken = analyzedTokenReading.getReadings().get(3);
    assertEquals("inflectedform3", analyzedToken.getToken());
    assertEquals("lemma3d", analyzedToken.getLemma());
    assertEquals("POS3d", analyzedToken.getPOSTag());
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 37 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class RuleFilterEvaluatorTest method testDuplicateKey.

@Test(expected = RuntimeException.class)
public void testDuplicateKey() throws Exception {
    AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "SENT_START", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0) };
    eval.getResolvedArguments("year:\\1 year:\\2", readingsList, Arrays.asList(1, 2));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 38 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class AvsAnRule method match.

@Override
public RuleMatch[] match(AnalyzedSentence sentence) {
    List<RuleMatch> ruleMatches = new ArrayList<>();
    AnalyzedTokenReadings[] tokens = sentence.getTokensWithoutWhitespace();
    int prevTokenIndex = 0;
    for (int i = 1; i < tokens.length; i++) {
        // ignoring token 0, i.e., SENT_START
        AnalyzedTokenReadings token = tokens[i];
        String prevTokenStr = prevTokenIndex > 0 ? tokens[prevTokenIndex].getToken() : null;
        boolean isSentenceStart = prevTokenIndex == 1;
        boolean equalsA = "a".equalsIgnoreCase(prevTokenStr);
        boolean equalsAn = "an".equalsIgnoreCase(prevTokenStr);
        if (!isSentenceStart) {
            equalsA = "a".equals(prevTokenStr);
            equalsAn = "an".equals(prevTokenStr);
        }
        if (equalsA || equalsAn) {
            Determiner determiner = getCorrectDeterminerFor(token);
            String msg = null;
            if (equalsA && determiner == Determiner.AN) {
                String replacement = StringTools.startsWithUppercase(prevTokenStr) ? "An" : "an";
                msg = "Use <suggestion>" + replacement + "</suggestion> instead of '" + prevTokenStr + "' if the following " + "word starts with a vowel sound, e.g. 'an article', 'an hour'";
            } else if (equalsAn && determiner == Determiner.A) {
                String replacement = StringTools.startsWithUppercase(prevTokenStr) ? "A" : "a";
                msg = "Use <suggestion>" + replacement + "</suggestion> instead of '" + prevTokenStr + "' if the following " + "word doesn't start with a vowel sound, e.g. 'a sentence', 'a university'";
            }
            if (msg != null) {
                RuleMatch match = new RuleMatch(this, tokens[prevTokenIndex].getStartPos(), tokens[prevTokenIndex].getEndPos(), msg, "Wrong article");
                ruleMatches.add(match);
            }
        }
        if (token.hasPosTag("DT")) {
            prevTokenIndex = i;
        } else if (token.getToken().matches("[-\"()\\[\\]]+")) {
        // skip e.g. the quote in >>an "industry party"<<
        } else {
            prevTokenIndex = 0;
        }
    }
    return toRuleMatchArray(ruleMatches);
}
Also used : ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 39 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class EnglishPartialPosTagFilter method tag.

@Override
protected List<AnalyzedTokenReadings> tag(String token) {
    try {
        List<AnalyzedTokenReadings> tags = tagger.tag(Collections.singletonList(token));
        AnalyzedTokenReadings[] atr = tags.toArray(new AnalyzedTokenReadings[tags.size()]);
        AnalyzedSentence disambiguated = disambiguator.disambiguate(new AnalyzedSentence(atr));
        return Arrays.asList(disambiguated.getTokens());
    } catch (IOException e) {
        throw new RuntimeException("Could not tag and disambiguate '" + token + "'", e);
    }
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) IOException(java.io.IOException) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 40 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class EnglishChunkerTest method testContractions.

@Test
public void testContractions() throws Exception {
    JLanguageTool langTool = new JLanguageTool(new English());
    AnalyzedSentence analyzedSentence = langTool.getAnalyzedSentence("I'll be there");
    AnalyzedTokenReadings[] tokens = analyzedSentence.getTokens();
    assertThat(tokens[1].getChunkTags().get(0), is(new ChunkTag("B-NP-singular")));
    // "'" cannot be mapped as we tokenize differently
    assertThat(tokens[2].getChunkTags().size(), is(0));
    // "ll" cannot be mapped as we tokenize differently
    assertThat(tokens[3].getChunkTags().size(), is(0));
    assertThat(tokens[5].getChunkTags().get(0), is(new ChunkTag("I-VP")));
}
Also used : English(org.languagetool.language.English) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Aggregations

AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)116 AnalyzedToken (org.languagetool.AnalyzedToken)48 ArrayList (java.util.ArrayList)47 AnalyzedSentence (org.languagetool.AnalyzedSentence)21 Test (org.junit.Test)16 RuleMatch (org.languagetool.rules.RuleMatch)14 Matcher (java.util.regex.Matcher)13 IOException (java.io.IOException)7 Nullable (org.jetbrains.annotations.Nullable)6 JLanguageTool (org.languagetool.JLanguageTool)6 Pattern (java.util.regex.Pattern)5 ChunkTag (org.languagetool.chunking.ChunkTag)5 English (org.languagetool.language.English)3 TaggedWord (org.languagetool.tagging.TaggedWord)3 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2