Search in sources :

Example 86 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class ManualTaggerAdapterTest method testMultipleWords.

@Test
public void testMultipleWords() throws Exception {
    List<String> l = Arrays.asList("inflectedform2", "inflectedform3");
    List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(2, analyzedTokenReadings.size());
    AnalyzedTokenReadings analyzedTokenReading;
    analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform2", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(3, analyzedTokenReading.getReadingsLength());
    // analyzedTokenReading.getReadings are tested by #testMultipleLemma() 
    analyzedTokenReading = analyzedTokenReadings.get(1);
    assertEquals("inflectedform3", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(4, analyzedTokenReading.getReadingsLength());
// analyzedTokenReading.getReadings are tested by #testMultiplePOS()  
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 87 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class WordTokenizer method run.

private void run(final String lang) throws IOException {
    JLanguageTool langTool = new JLanguageTool(Languages.getLanguageForShortCode(lang));
    BufferedReader in = null;
    BufferedWriter out = null;
    try {
        in = new BufferedReader(new InputStreamReader(System.in));
        out = new BufferedWriter(new OutputStreamWriter(System.out));
        String line;
        while ((line = in.readLine()) != null) {
            AnalyzedTokenReadings[] atr = langTool.getRawAnalyzedSentence(line).getTokensWithoutWhitespace();
            for (AnalyzedTokenReadings a : atr) {
                out.write(a.getToken());
                out.write("\n");
            }
        }
    } finally {
        if (in != null) {
            in.close();
        }
        if (out != null) {
            out.flush();
            out.close();
        }
    }
}
Also used : InputStreamReader(java.io.InputStreamReader) JLanguageTool(org.languagetool.JLanguageTool) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 88 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class UnifierTest method testAddNeutralElement.

@Test
public void testAddNeutralElement() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
    unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
    unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "neutral", preparePOSElement(".*[\\.:]n([\\.:].*)?"));
    Unifier uni = unifierConfig.createUnifier();
    Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    AnalyzedToken sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    AnalyzedToken sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    AnalyzedToken sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
    AnalyzedToken comma = new AnalyzedToken(",", "comma", ",");
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.addNeutralElement(new AnalyzedTokenReadings(comma, 0));
    assertEquals(true, uni.isUnified(sing2, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], ,[,/comma*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 89 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class ManualTaggerAdapter method tag.

@Override
public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException {
    List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    for (String word : sentenceTokens) {
        List<AnalyzedToken> l = new ArrayList<>();
        List<TaggedWord> manualTags = manualTagger.tag(word.toLowerCase());
        for (TaggedWord manualTag : manualTags) {
            l.add(new AnalyzedToken(word, manualTag.getPosTag(), manualTag.getLemma()));
        }
        if (l.isEmpty()) {
            l.add(new AnalyzedToken(word, null, null));
        }
        tokenReadings.add(new AnalyzedTokenReadings(l, pos));
        pos += word.length();
    }
    return tokenReadings;
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) TaggedWord(org.languagetool.tagging.TaggedWord) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 90 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class MissingGenitiveFinder method hasEsGenitive.

private boolean hasEsGenitive(GermanTagger tagger, String word) throws IOException {
    String esForm = word.replaceFirst("s$", "es");
    List<AnalyzedTokenReadings> readings = tagger.tag(Collections.singletonList(esForm));
    for (AnalyzedTokenReadings reading : readings) {
        if (reading.isTagged()) {
            return true;
        }
    }
    return false;
}
Also used : AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Aggregations

AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)116 AnalyzedToken (org.languagetool.AnalyzedToken)48 ArrayList (java.util.ArrayList)47 AnalyzedSentence (org.languagetool.AnalyzedSentence)21 Test (org.junit.Test)16 RuleMatch (org.languagetool.rules.RuleMatch)14 Matcher (java.util.regex.Matcher)13 IOException (java.io.IOException)7 Nullable (org.jetbrains.annotations.Nullable)6 JLanguageTool (org.languagetool.JLanguageTool)6 Pattern (java.util.regex.Pattern)5 ChunkTag (org.languagetool.chunking.ChunkTag)5 English (org.languagetool.language.English)3 TaggedWord (org.languagetool.tagging.TaggedWord)3 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2