Search in sources :

Example 21 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class RuleFilterEvaluatorTest method testGetResolvedArgumentsWithColon.

@Test
public void testGetResolvedArgumentsWithColon() throws Exception {
    AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0) };
    Map<String, String> map = eval.getResolvedArguments("regex:(?:foo[xyz])bar", readingsList, Arrays.asList(1, 1));
    assertThat(map.get("regex"), is("(?:foo[xyz])bar"));
    assertThat(map.size(), is(1));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 22 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testMultipleFeatsWithMultipleTypes.

@Test
public void testMultipleFeatsWithMultipleTypes() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
    unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
    unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m1([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m2([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m3([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "neutral1", preparePOSElement(".*[\\.:]n1(?:[\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "neutral2", preparePOSElement(".*[\\.:]n2(?:[\\.:].*)?"));
    unifierConfig.setEquivalence("case", "nominativus", preparePOSElement(".*[\\.:]nom[\\.:]?.*"));
    unifierConfig.setEquivalence("case", "accusativus", preparePOSElement(".*[\\.:]acc[\\.:]?.*"));
    unifierConfig.setEquivalence("case", "dativus", preparePOSElement(".*[\\.:]dat[\\.:]?.*"));
    unifierConfig.setEquivalence("case", "vocativus", preparePOSElement(".*[\\.:]voc[\\.:]?.*"));
    Unifier uni = unifierConfig.createUnifier();
    AnalyzedToken sing1 = new AnalyzedToken("niezgorsze", "adj:sg:acc:n1.n2:pos", "niezgorszy");
    AnalyzedToken sing1a = new AnalyzedToken("niezgorsze", "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos", "niezgorszy");
    AnalyzedToken sing1b = new AnalyzedToken("niezgorsze", "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos", "niezgorszy");
    AnalyzedToken sing1c = new AnalyzedToken("niezgorsze", "adj:sg:nom.voc:n1.n2:pos", "niezgorszy");
    AnalyzedToken sing2 = new AnalyzedToken("lekarstwo", "subst:sg:acc:n2", "lekarstwo");
    AnalyzedToken sing2b = new AnalyzedToken("lekarstwo", "subst:sg:nom:n2", "lekarstwo");
    AnalyzedToken sing2c = new AnalyzedToken("lekarstwo", "subst:sg:voc:n2", "lekarstwo");
    Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    equiv.put("case", null);
    //now test the simplified interface
    uni.isUnified(sing1, equiv, false);
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, false);
    uni.isUnified(sing1c, equiv, true);
    uni.isUnified(sing2, equiv, false);
    uni.isUnified(sing2b, equiv, false);
    assertEquals(true, uni.isUnified(sing2c, equiv, true));
    assertEquals("[niezgorsze[niezgorszy/adj:sg:acc:n1.n2:pos*,niezgorszy/adj:sg:nom.voc:n1.n2:pos*], " + "lekarstwo[lekarstwo/subst:sg:acc:n2*,lekarstwo/subst:sg:nom:n2*,lekarstwo/subst:sg:voc:n2*]]", Arrays.toString(uni.getUnifiedTokens()));
    uni.reset();
    //test in a different order
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1, equiv, false);
    uni.isUnified(sing1c, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2b, equiv, false);
    uni.isUnified(sing2c, equiv, false);
    assertEquals(true, uni.isUnified(sing2, equiv, true));
    assertEquals("[niezgorsze[niezgorszy/adj:sg:acc:n1.n2:pos*,niezgorszy/adj:sg:nom.voc:n1.n2:pos*], " + "lekarstwo[lekarstwo/subst:sg:nom:n2*,lekarstwo/subst:sg:voc:n2*,lekarstwo/subst:sg:acc:n2*]]", Arrays.toString(uni.getUnifiedTokens()));
    uni.reset();
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) Test(org.junit.Test)

Example 23 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testMultipleFeats.

// checks if all tokens share the same set of features to be unified
@Test
public void testMultipleFeats() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
    unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
    unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "neutral", preparePOSElement(".*[\\.:]n([\\.:].*)?"));
    Unifier uni = unifierConfig.createUnifier();
    AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
    AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
    AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
    AnalyzedToken sing2 = new AnalyzedToken("zgarbiony", "adj:pl:blahblah:f", "zgarbiony");
    AnalyzedToken sing3 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
    Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    boolean satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    satisfied |= uni.isSatisfied(sing1b, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    uni.startNextToken();
    satisfied &= uni.isSatisfied(sing3, equiv);
    uni.startNextToken();
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();
    //now test the simplified interface
    uni.isUnified(sing1, equiv, false);
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2, equiv, true);
    assertEquals(false, uni.isUnified(sing3, equiv, true));
    uni.reset();
    sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    assertEquals(true, uni.isUnified(sing2, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();
    //now test a case when the last reading doesn't match at all
    sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    AnalyzedToken sing2a = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
    AnalyzedToken sing2b = new AnalyzedToken("godło", "indecl", "godło");
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.isUnified(sing2a, equiv, false);
    assertEquals(true, uni.isUnified(sing2b, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();
    //check if two features are left out correctly (both match)
    AnalyzedToken plur1 = new AnalyzedToken("zgarbieni", "adj:pl:foobar:m", "zgarbiony");
    AnalyzedToken plur2 = new AnalyzedToken("zgarbieni", "adj:pl:blabla:m", "zgarbiony");
    AnalyzedToken plur3 = new AnalyzedToken("ludzie", "subst:pl:blabla:m", "człowiek");
    AnalyzedToken plur4 = new AnalyzedToken("ludzie", "subst:pl:pampam:m", "człowiek");
    uni.isUnified(plur1, equiv, false);
    uni.isUnified(plur2, equiv, true);
    uni.isUnified(plur3, equiv, false);
    assertTrue(uni.isUnified(plur4, equiv, true));
    assertEquals("[zgarbieni[zgarbiony/adj:pl:foobar:m*,zgarbiony/adj:pl:blabla:m*], " + "ludzie[człowiek/subst:pl:blabla:m*,człowiek/subst:pl:pampam:m*]]", Arrays.toString(uni.getFinalUnified()));
    //check with a sequence of many tokens
    uni.reset();
    AnalyzedToken case1a = new AnalyzedToken("xx", "abc:sg:f", "xx");
    AnalyzedToken case1b = new AnalyzedToken("xx", "cde:pl:f", "xx");
    AnalyzedToken case2a = new AnalyzedToken("yy", "abc:pl:f", "yy");
    AnalyzedToken case2b = new AnalyzedToken("yy", "cde:as:f", "yy");
    AnalyzedToken case2c = new AnalyzedToken("yy", "cde:pl:c", "yy");
    AnalyzedToken case2d = new AnalyzedToken("yy", "abc:sg:f", "yy");
    AnalyzedToken case2e = new AnalyzedToken("yy", "efg:aa:e", "yy");
    uni.isUnified(case1a, equiv, false);
    uni.isUnified(case1b, equiv, true);
    uni.isUnified(case2a, equiv, false);
    uni.isUnified(case2b, equiv, false);
    uni.isUnified(case2c, equiv, false);
    uni.isUnified(case2d, equiv, false);
    assertTrue(uni.isUnified(case2e, equiv, true));
    assertEquals("[xx[xx/abc:sg:f*,xx/cde:pl:f*], yy[yy/abc:pl:f*,yy/abc:sg:f*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();
    AnalyzedToken tokenComplex1_1 = new AnalyzedToken("xx", "abc:sg:f", "xx1");
    AnalyzedToken tokenComplex1_2 = new AnalyzedToken("xx", "cde:pl:f", "xx2");
    AnalyzedToken tokenComplex2_1 = new AnalyzedToken("yy", "abc:sg:f", "yy1");
    AnalyzedToken tokenComplex2_2 = new AnalyzedToken("yy", "cde:pl:f", "yy2");
    AnalyzedToken tokenComplex3 = new AnalyzedToken("zz", "cde:sg:f", "zz");
    uni.isUnified(tokenComplex1_1, equiv, false);
    uni.isUnified(tokenComplex1_2, equiv, true);
    uni.isUnified(tokenComplex2_1, equiv, false);
    uni.isUnified(tokenComplex2_2, equiv, true);
    //both readings of tokenComplex1 and tokenComplex2 should be here:
    assertEquals("[xx[xx1/abc:sg:f*,xx2/cde:pl:f*], yy[yy1/abc:sg:f*,yy2/cde:pl:f*]]", Arrays.toString(uni.getFinalUnified()));
    assertTrue(uni.isUnified(tokenComplex3, equiv, true));
    //only one reading of tokenComplex1 and tokenComplex2 - as only one agrees with tokenComplex3
    assertEquals("[xx[xx1/abc:sg:f*], yy[yy1/abc:sg:f*], zz[zz/cde:sg:f*]]", Arrays.toString(uni.getFinalUnified()));
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) Test(org.junit.Test)

Example 24 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testUnificationCase.

// trivial unification = test if the character case is the same
@Test
public void testUnificationCase() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    PatternToken elLower = new PatternToken("\\p{Ll}+", true, true, false);
    PatternToken elUpper = new PatternToken("\\p{Lu}\\p{Ll}+", true, true, false);
    PatternToken elAllUpper = new PatternToken("\\p{Lu}+$", true, true, false);
    unifierConfig.setEquivalence("case-sensitivity", "lowercase", elLower);
    unifierConfig.setEquivalence("case-sensitivity", "uppercase", elUpper);
    unifierConfig.setEquivalence("case-sensitivity", "alluppercase", elAllUpper);
    AnalyzedToken lower1 = new AnalyzedToken("lower", "JJR", "lower");
    AnalyzedToken lower2 = new AnalyzedToken("lowercase", "JJ", "lowercase");
    AnalyzedToken upper1 = new AnalyzedToken("Uppercase", "JJ", "Uppercase");
    AnalyzedToken upper2 = new AnalyzedToken("John", "NNP", "John");
    AnalyzedToken upperAll1 = new AnalyzedToken("JOHN", "NNP", "John");
    AnalyzedToken upperAll2 = new AnalyzedToken("JAMES", "NNP", "James");
    Unifier uni = unifierConfig.createUnifier();
    Map<String, List<String>> equiv = new HashMap<>();
    List<String> list1 = new ArrayList<>();
    list1.add("lowercase");
    equiv.put("case-sensitivity", list1);
    boolean satisfied = uni.isSatisfied(lower1, equiv);
    satisfied &= uni.isSatisfied(lower2, equiv);
    uni.startUnify();
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    satisfied = uni.isSatisfied(upper2, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(lower2, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();
    satisfied = uni.isSatisfied(upper1, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(lower1, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();
    satisfied = uni.isSatisfied(upper2, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(upper1, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();
    equiv.clear();
    list1.clear();
    list1.add("uppercase");
    equiv.put("case-sensitivity", list1);
    satisfied = uni.isSatisfied(upper2, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(upper1, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    equiv.clear();
    list1.clear();
    list1.add("alluppercase");
    equiv.put("case-sensitivity", list1);
    satisfied = uni.isSatisfied(upper2, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(upper1, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();
    satisfied = uni.isSatisfied(upperAll2, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(upperAll1, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) Test(org.junit.Test)

Example 25 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testUnificationNumber.

// slightly non-trivial unification = test if the grammatical number is the same
@Test
public void testUnificationNumber() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
    unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
    Unifier uni = unifierConfig.createUnifier();
    AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah", "mały");
    AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah", "człowiek");
    Map<String, List<String>> equiv = new HashMap<>();
    List<String> list1 = new ArrayList<>();
    list1.add("singular");
    equiv.put("number", list1);
    boolean satisfied = uni.isSatisfied(sing1, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    //for multiple readings - OR for interpretations, AND for tokens
    AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
    satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    //check if any of the equivalences is there
    list1.add("plural");
    equiv.clear();
    equiv.put("number", list1);
    sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
    satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    //now test all possible feature equivalences by leaving type blank
    sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
    equiv.clear();
    equiv.put("number", null);
    satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    //test non-agreeing tokens with blank types
    satisfied = uni.isSatisfied(sing1a, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, satisfied);
    uni.reset();
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) Test(org.junit.Test)

Aggregations

AnalyzedToken (org.languagetool.AnalyzedToken)89 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)48 ArrayList (java.util.ArrayList)43 Matcher (java.util.regex.Matcher)16 Test (org.junit.Test)16 IOException (java.io.IOException)9 Pattern (java.util.regex.Pattern)7 Nullable (org.jetbrains.annotations.Nullable)6 TaggedWord (org.languagetool.tagging.TaggedWord)6 RuleMatch (org.languagetool.rules.RuleMatch)4 Synthesizer (org.languagetool.synthesis.Synthesizer)4 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 DictionaryLookup (morfologik.stemming.DictionaryLookup)2 IStemmer (morfologik.stemming.IStemmer)2 AnalyzedSentence (org.languagetool.AnalyzedSentence)2 ChunkTag (org.languagetool.chunking.ChunkTag)2