Search in sources :

Example 31 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class MultiWordChunkerTest method testDisambiguateMultiSpace.

@Test
public void testDisambiguateMultiSpace() throws Exception {
    Disambiguator chunker = new MultiWordChunker("/uk/multiwords.txt");
    JLanguageTool lt = new JLanguageTool(new Ukrainian());
    AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("для  годиться.");
    AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
    AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
    assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
    assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
Also used : Ukrainian(org.languagetool.language.Ukrainian) AnalyzedSentence(org.languagetool.AnalyzedSentence) JLanguageTool(org.languagetool.JLanguageTool) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 32 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class RuleFilterEvaluatorTest method testGetResolvedArguments.

@Test
public void testGetResolvedArguments() throws Exception {
    AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0) };
    Map<String, String> map = eval.getResolvedArguments("year:\\1 month:\\2", readingsList, Arrays.asList(1, 1));
    assertThat(map.get("year"), is("fake1"));
    assertThat(map.get("month"), is("fake2"));
    assertThat(map.size(), is(2));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 33 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class RuleFilterEvaluatorTest method testGetResolvedArgumentsWithColon.

@Test
public void testGetResolvedArgumentsWithColon() throws Exception {
    AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0) };
    Map<String, String> map = eval.getResolvedArguments("regex:(?:foo[xyz])bar", readingsList, Arrays.asList(1, 1));
    assertThat(map.get("regex"), is("(?:foo[xyz])bar"));
    assertThat(map.size(), is(1));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 34 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class DisambiguationRuleTest method testDisambiguationRulesFromXML.

private void testDisambiguationRulesFromXML(List<DisambiguationPatternRule> rules, JLanguageTool languageTool, Language lang) throws IOException {
    for (DisambiguationPatternRule rule : rules) {
        String id = rule.getId();
        if (rule.getUntouchedExamples() != null) {
            List<String> goodSentences = rule.getUntouchedExamples();
            for (String goodSentence : goodSentences) {
                // enable indentation use
                goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
                goodSentence = cleanXML(goodSentence);
                assertTrue(goodSentence.trim().length() > 0);
                AnalyzedSentence sent = disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(goodSentence));
                AnalyzedSentence sentToReplace = disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(goodSentence));
                //note: we're testing only if string representations are equal
                //it's because getRawAnalyzedSentence does not set all properties
                //in AnalyzedSentence, and during equal test they are set for the
                //left-hand side
                assertEquals("The untouched example (" + goodSentence + ") for " + lang.getName() + " rule " + rule + "] was touched!", sent.toString(), rule.replace(sentToReplace).toString());
            }
        }
        List<DisambiguatedExample> examples = rule.getExamples();
        if (examples != null) {
            for (DisambiguatedExample example : examples) {
                String outputForms = example.getDisambiguated();
                assertTrue("No output form found for: " + id, outputForms != null);
                assertTrue(outputForms.trim().length() > 0);
                int expectedMatchStart = example.getExample().indexOf("<marker>");
                int expectedMatchEnd = example.getExample().indexOf("</marker>") - "<marker>".length();
                if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
                    fail(lang + ": No position markup ('<marker>...</marker>') in disambiguated example in rule " + rule);
                }
                String inputForms = example.getAmbiguous();
                assertTrue("No input form found for: " + id, inputForms != null);
                assertTrue(inputForms.trim().length() > 0);
                assertTrue("Input and output forms for rule " + id + " are the same!", !outputForms.equals(inputForms));
                AnalyzedSentence cleanInput = languageTool.getRawAnalyzedSentence(cleanXML(example.getExample()));
                AnalyzedSentence sent = disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(cleanXML(example.getExample())));
                AnalyzedSentence disambiguatedSent = rule.replace(disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(cleanXML(example.getExample()))));
                assertTrue("Disambiguated sentence is equal to the non-disambiguated sentence for rule: " + id + ". The sentence was: " + sent, !cleanInput.equals(disambiguatedSent));
                assertTrue("Disambiguated sentence is equal to the input sentence for rule: " + id + ". The sentence was: " + sent, !sent.equals(disambiguatedSent));
                String reading = "";
                String annotations = "";
                for (AnalyzedTokenReadings readings : sent.getTokens()) {
                    if (readings.isSentenceStart() && !inputForms.contains("<S>")) {
                        continue;
                    }
                    if (readings.getStartPos() == expectedMatchStart) {
                        AnalyzedTokenReadings[] r = { readings };
                        reading = new AnalyzedSentence(r).toShortString(",");
                        annotations = readings.getHistoricalAnnotations();
                        int startPos = readings.getStartPos();
                        int endPos = readings.getEndPos();
                        assertTrue("Wrong marker position in the example for the rule " + id + ": got " + startPos + "-" + endPos + ", expected " + expectedMatchStart + "-" + expectedMatchEnd, startPos == expectedMatchStart && endPos == expectedMatchEnd);
                        break;
                    }
                }
                assertEquals("The input form for the rule " + id + " in the example: " + example + " is different than expected (expected " + inputForms + " but got " + sortForms(reading) + "). The token has been changed by the disambiguator: " + annotations, inputForms, sortForms(reading));
                for (AnalyzedTokenReadings readings : disambiguatedSent.getTokens()) {
                    if (readings.isSentenceStart() && !outputForms.contains("<S>")) {
                        continue;
                    }
                    if (readings.getStartPos() == expectedMatchStart) {
                        AnalyzedTokenReadings[] r = { readings };
                        reading = new AnalyzedSentence(r).toShortString(",");
                        assertTrue(readings.getStartPos() == expectedMatchStart && readings.getEndPos() == expectedMatchEnd);
                        break;
                    }
                }
                assertEquals("The output form for the rule " + id + " in the example: " + example + " is different than expected (expected " + outputForms + " but got " + sortForms(reading) + "). The token has been changed by the disambiguator: " + annotations, outputForms, sortForms(reading));
            }
        }
    }
}
Also used : AnalyzedSentence(org.languagetool.AnalyzedSentence) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Example 35 with AnalyzedTokenReadings

use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.

the class ManualTaggerAdapterTest method testMultiplePOS.

@Test
public void testMultiplePOS() throws Exception {
    List<String> l = Arrays.asList("inflectedform2");
    List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
    assertNotNull(analyzedTokenReadings);
    assertEquals(1, analyzedTokenReadings.size());
    AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
    assertEquals("inflectedform2", analyzedTokenReading.getToken());
    assertNotNull(analyzedTokenReading.getReadings());
    assertEquals(3, analyzedTokenReading.getReadingsLength());
    AnalyzedToken analyzedToken;
    analyzedToken = analyzedTokenReading.getReadings().get(0);
    assertEquals("POS1a", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
    analyzedToken = analyzedTokenReading.getReadings().get(1);
    assertEquals("POS1b", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
    analyzedToken = analyzedTokenReading.getReadings().get(2);
    assertEquals("POS1c", analyzedToken.getPOSTag());
    assertEquals("inflectedform2", analyzedToken.getToken());
    assertEquals("lemma2", analyzedToken.getLemma());
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Aggregations

AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)116 AnalyzedToken (org.languagetool.AnalyzedToken)48 ArrayList (java.util.ArrayList)47 AnalyzedSentence (org.languagetool.AnalyzedSentence)21 Test (org.junit.Test)16 RuleMatch (org.languagetool.rules.RuleMatch)14 Matcher (java.util.regex.Matcher)13 IOException (java.io.IOException)7 Nullable (org.jetbrains.annotations.Nullable)6 JLanguageTool (org.languagetool.JLanguageTool)6 Pattern (java.util.regex.Pattern)5 ChunkTag (org.languagetool.chunking.ChunkTag)5 English (org.languagetool.language.English)3 TaggedWord (org.languagetool.tagging.TaggedWord)3 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 List (java.util.List)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)2