use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class RuleFilterEvaluatorTest method testGetResolvedArgumentsWithColon.
@Test
public void testGetResolvedArgumentsWithColon() throws Exception {
AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0) };
Map<String, String> map = eval.getResolvedArguments("regex:(?:foo[xyz])bar", readingsList, Arrays.asList(1, 1));
assertThat(map.get("regex"), is("(?:foo[xyz])bar"));
assertThat(map.size(), is(1));
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testMultipleFeatsWithMultipleTypes.
@Test
public void testMultipleFeatsWithMultipleTypes() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m1([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m2([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m3([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "neutral1", preparePOSElement(".*[\\.:]n1(?:[\\.:].*)?"));
unifierConfig.setEquivalence("gender", "neutral2", preparePOSElement(".*[\\.:]n2(?:[\\.:].*)?"));
unifierConfig.setEquivalence("case", "nominativus", preparePOSElement(".*[\\.:]nom[\\.:]?.*"));
unifierConfig.setEquivalence("case", "accusativus", preparePOSElement(".*[\\.:]acc[\\.:]?.*"));
unifierConfig.setEquivalence("case", "dativus", preparePOSElement(".*[\\.:]dat[\\.:]?.*"));
unifierConfig.setEquivalence("case", "vocativus", preparePOSElement(".*[\\.:]voc[\\.:]?.*"));
Unifier uni = unifierConfig.createUnifier();
AnalyzedToken sing1 = new AnalyzedToken("niezgorsze", "adj:sg:acc:n1.n2:pos", "niezgorszy");
AnalyzedToken sing1a = new AnalyzedToken("niezgorsze", "adj:pl:acc:m2.m3.f.n1.n2.p2.p3:pos", "niezgorszy");
AnalyzedToken sing1b = new AnalyzedToken("niezgorsze", "adj:pl:nom.voc:m2.m3.f.n1.n2.p2.p3:pos", "niezgorszy");
AnalyzedToken sing1c = new AnalyzedToken("niezgorsze", "adj:sg:nom.voc:n1.n2:pos", "niezgorszy");
AnalyzedToken sing2 = new AnalyzedToken("lekarstwo", "subst:sg:acc:n2", "lekarstwo");
AnalyzedToken sing2b = new AnalyzedToken("lekarstwo", "subst:sg:nom:n2", "lekarstwo");
AnalyzedToken sing2c = new AnalyzedToken("lekarstwo", "subst:sg:voc:n2", "lekarstwo");
Map<String, List<String>> equiv = new HashMap<>();
equiv.put("number", null);
equiv.put("gender", null);
equiv.put("case", null);
//now test the simplified interface
uni.isUnified(sing1, equiv, false);
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1b, equiv, false);
uni.isUnified(sing1c, equiv, true);
uni.isUnified(sing2, equiv, false);
uni.isUnified(sing2b, equiv, false);
assertEquals(true, uni.isUnified(sing2c, equiv, true));
assertEquals("[niezgorsze[niezgorszy/adj:sg:acc:n1.n2:pos*,niezgorszy/adj:sg:nom.voc:n1.n2:pos*], " + "lekarstwo[lekarstwo/subst:sg:acc:n2*,lekarstwo/subst:sg:nom:n2*,lekarstwo/subst:sg:voc:n2*]]", Arrays.toString(uni.getUnifiedTokens()));
uni.reset();
//test in a different order
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1, equiv, false);
uni.isUnified(sing1c, equiv, false);
uni.isUnified(sing1b, equiv, true);
uni.isUnified(sing2b, equiv, false);
uni.isUnified(sing2c, equiv, false);
assertEquals(true, uni.isUnified(sing2, equiv, true));
assertEquals("[niezgorsze[niezgorszy/adj:sg:acc:n1.n2:pos*,niezgorszy/adj:sg:nom.voc:n1.n2:pos*], " + "lekarstwo[lekarstwo/subst:sg:nom:n2*,lekarstwo/subst:sg:voc:n2*,lekarstwo/subst:sg:acc:n2*]]", Arrays.toString(uni.getUnifiedTokens()));
uni.reset();
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testMultipleFeats.
// checks if all tokens share the same set of features to be unified
@Test
public void testMultipleFeats() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "neutral", preparePOSElement(".*[\\.:]n([\\.:].*)?"));
Unifier uni = unifierConfig.createUnifier();
AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
AnalyzedToken sing1b = new AnalyzedToken("mały", "adj:pl:blahblah:f", "mały");
AnalyzedToken sing2 = new AnalyzedToken("zgarbiony", "adj:pl:blahblah:f", "zgarbiony");
AnalyzedToken sing3 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
Map<String, List<String>> equiv = new HashMap<>();
equiv.put("number", null);
equiv.put("gender", null);
boolean satisfied = uni.isSatisfied(sing1, equiv);
satisfied |= uni.isSatisfied(sing1a, equiv);
satisfied |= uni.isSatisfied(sing1b, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
uni.startNextToken();
satisfied &= uni.isSatisfied(sing3, equiv);
uni.startNextToken();
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, satisfied);
uni.reset();
//now test the simplified interface
uni.isUnified(sing1, equiv, false);
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1b, equiv, true);
uni.isUnified(sing2, equiv, true);
assertEquals(false, uni.isUnified(sing3, equiv, true));
uni.reset();
sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1b, equiv, true);
assertEquals(true, uni.isUnified(sing2, equiv, true));
assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
uni.reset();
//now test a case when the last reading doesn't match at all
sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
AnalyzedToken sing2a = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
AnalyzedToken sing2b = new AnalyzedToken("godło", "indecl", "godło");
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1b, equiv, true);
uni.isUnified(sing2a, equiv, false);
assertEquals(true, uni.isUnified(sing2b, equiv, true));
assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
uni.reset();
//check if two features are left out correctly (both match)
AnalyzedToken plur1 = new AnalyzedToken("zgarbieni", "adj:pl:foobar:m", "zgarbiony");
AnalyzedToken plur2 = new AnalyzedToken("zgarbieni", "adj:pl:blabla:m", "zgarbiony");
AnalyzedToken plur3 = new AnalyzedToken("ludzie", "subst:pl:blabla:m", "człowiek");
AnalyzedToken plur4 = new AnalyzedToken("ludzie", "subst:pl:pampam:m", "człowiek");
uni.isUnified(plur1, equiv, false);
uni.isUnified(plur2, equiv, true);
uni.isUnified(plur3, equiv, false);
assertTrue(uni.isUnified(plur4, equiv, true));
assertEquals("[zgarbieni[zgarbiony/adj:pl:foobar:m*,zgarbiony/adj:pl:blabla:m*], " + "ludzie[człowiek/subst:pl:blabla:m*,człowiek/subst:pl:pampam:m*]]", Arrays.toString(uni.getFinalUnified()));
//check with a sequence of many tokens
uni.reset();
AnalyzedToken case1a = new AnalyzedToken("xx", "abc:sg:f", "xx");
AnalyzedToken case1b = new AnalyzedToken("xx", "cde:pl:f", "xx");
AnalyzedToken case2a = new AnalyzedToken("yy", "abc:pl:f", "yy");
AnalyzedToken case2b = new AnalyzedToken("yy", "cde:as:f", "yy");
AnalyzedToken case2c = new AnalyzedToken("yy", "cde:pl:c", "yy");
AnalyzedToken case2d = new AnalyzedToken("yy", "abc:sg:f", "yy");
AnalyzedToken case2e = new AnalyzedToken("yy", "efg:aa:e", "yy");
uni.isUnified(case1a, equiv, false);
uni.isUnified(case1b, equiv, true);
uni.isUnified(case2a, equiv, false);
uni.isUnified(case2b, equiv, false);
uni.isUnified(case2c, equiv, false);
uni.isUnified(case2d, equiv, false);
assertTrue(uni.isUnified(case2e, equiv, true));
assertEquals("[xx[xx/abc:sg:f*,xx/cde:pl:f*], yy[yy/abc:pl:f*,yy/abc:sg:f*]]", Arrays.toString(uni.getFinalUnified()));
uni.reset();
AnalyzedToken tokenComplex1_1 = new AnalyzedToken("xx", "abc:sg:f", "xx1");
AnalyzedToken tokenComplex1_2 = new AnalyzedToken("xx", "cde:pl:f", "xx2");
AnalyzedToken tokenComplex2_1 = new AnalyzedToken("yy", "abc:sg:f", "yy1");
AnalyzedToken tokenComplex2_2 = new AnalyzedToken("yy", "cde:pl:f", "yy2");
AnalyzedToken tokenComplex3 = new AnalyzedToken("zz", "cde:sg:f", "zz");
uni.isUnified(tokenComplex1_1, equiv, false);
uni.isUnified(tokenComplex1_2, equiv, true);
uni.isUnified(tokenComplex2_1, equiv, false);
uni.isUnified(tokenComplex2_2, equiv, true);
//both readings of tokenComplex1 and tokenComplex2 should be here:
assertEquals("[xx[xx1/abc:sg:f*,xx2/cde:pl:f*], yy[yy1/abc:sg:f*,yy2/cde:pl:f*]]", Arrays.toString(uni.getFinalUnified()));
assertTrue(uni.isUnified(tokenComplex3, equiv, true));
//only one reading of tokenComplex1 and tokenComplex2 - as only one agrees with tokenComplex3
assertEquals("[xx[xx1/abc:sg:f*], yy[yy1/abc:sg:f*], zz[zz/cde:sg:f*]]", Arrays.toString(uni.getFinalUnified()));
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testUnificationCase.
// trivial unification = test if the character case is the same
@Test
public void testUnificationCase() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
PatternToken elLower = new PatternToken("\\p{Ll}+", true, true, false);
PatternToken elUpper = new PatternToken("\\p{Lu}\\p{Ll}+", true, true, false);
PatternToken elAllUpper = new PatternToken("\\p{Lu}+$", true, true, false);
unifierConfig.setEquivalence("case-sensitivity", "lowercase", elLower);
unifierConfig.setEquivalence("case-sensitivity", "uppercase", elUpper);
unifierConfig.setEquivalence("case-sensitivity", "alluppercase", elAllUpper);
AnalyzedToken lower1 = new AnalyzedToken("lower", "JJR", "lower");
AnalyzedToken lower2 = new AnalyzedToken("lowercase", "JJ", "lowercase");
AnalyzedToken upper1 = new AnalyzedToken("Uppercase", "JJ", "Uppercase");
AnalyzedToken upper2 = new AnalyzedToken("John", "NNP", "John");
AnalyzedToken upperAll1 = new AnalyzedToken("JOHN", "NNP", "John");
AnalyzedToken upperAll2 = new AnalyzedToken("JAMES", "NNP", "James");
Unifier uni = unifierConfig.createUnifier();
Map<String, List<String>> equiv = new HashMap<>();
List<String> list1 = new ArrayList<>();
list1.add("lowercase");
equiv.put("case-sensitivity", list1);
boolean satisfied = uni.isSatisfied(lower1, equiv);
satisfied &= uni.isSatisfied(lower2, equiv);
uni.startUnify();
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
satisfied = uni.isSatisfied(upper2, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(lower2, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, satisfied);
uni.reset();
satisfied = uni.isSatisfied(upper1, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(lower1, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, satisfied);
uni.reset();
satisfied = uni.isSatisfied(upper2, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(upper1, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, satisfied);
uni.reset();
equiv.clear();
list1.clear();
list1.add("uppercase");
equiv.put("case-sensitivity", list1);
satisfied = uni.isSatisfied(upper2, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(upper1, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
equiv.clear();
list1.clear();
list1.add("alluppercase");
equiv.put("case-sensitivity", list1);
satisfied = uni.isSatisfied(upper2, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(upper1, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, satisfied);
uni.reset();
satisfied = uni.isSatisfied(upperAll2, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(upperAll1, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testUnificationNumber.
// slightly non-trivial unification = test if the grammatical number is the same
@Test
public void testUnificationNumber() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
Unifier uni = unifierConfig.createUnifier();
AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah", "mały");
AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah", "człowiek");
Map<String, List<String>> equiv = new HashMap<>();
List<String> list1 = new ArrayList<>();
list1.add("singular");
equiv.put("number", list1);
boolean satisfied = uni.isSatisfied(sing1, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
//for multiple readings - OR for interpretations, AND for tokens
AnalyzedToken sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
satisfied = uni.isSatisfied(sing1, equiv);
satisfied |= uni.isSatisfied(sing1a, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
//check if any of the equivalences is there
list1.add("plural");
equiv.clear();
equiv.put("number", list1);
sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
satisfied = uni.isSatisfied(sing1, equiv);
satisfied |= uni.isSatisfied(sing1a, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
//now test all possible feature equivalences by leaving type blank
sing1a = new AnalyzedToken("mały", "adj:pl:blahblah", "mały");
equiv.clear();
equiv.put("number", null);
satisfied = uni.isSatisfied(sing1, equiv);
satisfied |= uni.isSatisfied(sing1a, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
//test non-agreeing tokens with blank types
satisfied = uni.isSatisfied(sing1a, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, satisfied);
uni.reset();
}
Aggregations