use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testNegation.
@Test
public void testNegation() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*:f"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*:m"));
Unifier uni = unifierConfig.createUnifier();
//Latin adjectives
AnalyzedToken sing_masc = new AnalyzedToken("parvus", "adj:sg:blahblah:m", "parvus");
AnalyzedToken plur_masc = new AnalyzedToken("parvi", "adj:sg:blahblah:m", "parvus");
AnalyzedToken plur_fem = new AnalyzedToken("parvae", "adj:pl:blahblah:f", "parvus");
AnalyzedToken sing_fem = new AnalyzedToken("parva", "adj:sg:blahblah:f", "parvus");
//Let's pretend Latin has determiners
AnalyzedToken det_sing_fem = new AnalyzedToken("una", "det:sg:blahblah:f", "unus");
AnalyzedToken det_plur_fem = new AnalyzedToken("unae", "det:pl:blahblah:f", "unus");
AnalyzedToken det_sing_masc = new AnalyzedToken("unus", "det:sg:blahblah:m", "unus");
AnalyzedToken det_plur_masc = new AnalyzedToken("uni", "det:sg:blahblah:m", "unus");
//and nouns
AnalyzedToken subst_sing_fem = new AnalyzedToken("discrepatio", "subst:sg:blahblah:f", "discrepatio");
AnalyzedToken subst_plur_fem = new AnalyzedToken("discrepationes", "subst:sg:blahblah:f", "discrepatio");
AnalyzedToken subst_sing_masc = new AnalyzedToken("homo", "sg:sg:blahblah:m", "homo");
AnalyzedToken subst_plur_masc = new AnalyzedToken("homines", "sg:sg:blahblah:m", "homo");
//now we should have 4x4x4 combinations...
Map<String, List<String>> equiv = new HashMap<>();
equiv.put("number", null);
equiv.put("gender", null);
boolean satisfied = uni.isSatisfied(det_sing_masc, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing_masc, equiv);
uni.startNextToken();
satisfied &= uni.isSatisfied(subst_sing_masc, equiv);
uni.startNextToken();
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
uni.reset();
//now test the simplified interface
uni.isUnified(det_sing_masc, equiv, true);
uni.isUnified(sing_masc, equiv, true);
assertEquals(true, uni.isUnified(subst_sing_masc, equiv, true));
uni.reset();
//now let's negate this
//traditional way
satisfied = uni.isSatisfied(det_sing_masc, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing_masc, equiv);
uni.startNextToken();
satisfied &= uni.isSatisfied(subst_sing_masc, equiv);
uni.startNextToken();
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(false, !satisfied);
uni.reset();
//now test the simplified interface
uni.isUnified(det_sing_masc, equiv, true);
uni.isUnified(sing_masc, equiv, true);
assertEquals(false, !uni.isUnified(subst_sing_masc, equiv, true));
uni.reset();
//OK, so let's test it with something that is not correct
uni.isUnified(det_sing_fem, equiv, true);
uni.isUnified(sing_masc, equiv, true);
assertEquals(true, !uni.isUnified(subst_sing_masc, equiv, true));
uni.reset();
//OK, so let's test it with something that is not correct
uni.isUnified(det_sing_masc, equiv, true);
uni.isUnified(sing_fem, equiv, true);
assertEquals(true, !uni.isUnified(subst_sing_masc, equiv, true));
uni.reset();
//OK, second token does not match
uni.isUnified(det_sing_masc, equiv, true);
uni.isUnified(sing_masc, equiv, true);
assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
uni.reset();
//OK, second token does not match
uni.isUnified(det_sing_masc, equiv, true);
uni.isUnified(plur_masc, equiv, true);
assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
uni.reset();
//OK, second token does not match
uni.isUnified(det_sing_masc, equiv, true);
uni.isUnified(plur_fem, equiv, true);
assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
uni.reset();
//and another one
uni.isUnified(det_plur_fem, equiv, true);
uni.isUnified(plur_fem, equiv, true);
assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
uni.reset();
//and another one
uni.isUnified(det_sing_fem, equiv, true);
uni.isUnified(plur_fem, equiv, true);
assertEquals(true, !uni.isUnified(subst_plur_fem, equiv, true));
uni.reset();
//and another one
uni.isUnified(det_sing_fem, equiv, true);
uni.isUnified(plur_fem, equiv, true);
assertEquals(true, !uni.isUnified(subst_plur_masc, equiv, true));
uni.reset();
//and another one
uni.isUnified(det_plur_masc, equiv, true);
uni.isUnified(plur_fem, equiv, true);
assertEquals(true, !uni.isUnified(subst_plur_masc, equiv, true));
uni.reset();
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testAddNeutralElement.
@Test
public void testAddNeutralElement() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "neutral", preparePOSElement(".*[\\.:]n([\\.:].*)?"));
Unifier uni = unifierConfig.createUnifier();
Map<String, List<String>> equiv = new HashMap<>();
equiv.put("number", null);
equiv.put("gender", null);
AnalyzedToken sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
AnalyzedToken sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
AnalyzedToken sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
AnalyzedToken comma = new AnalyzedToken(",", "comma", ",");
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1b, equiv, true);
uni.addNeutralElement(new AnalyzedTokenReadings(comma, 0));
assertEquals(true, uni.isUnified(sing2, equiv, true));
assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], ,[,/comma*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
uni.reset();
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class UnifierTest method testUnificationNumberGender.
//slightly non-trivial unification = test if the grammatical number & gender is the same
@Test
public void testUnificationNumberGender() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
PatternToken sgPatternToken = new PatternToken("", false, false, false);
sgPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]sg:.*", true, false));
unifierConfig.setEquivalence("number", "singular", sgPatternToken);
PatternToken plPatternToken = new PatternToken("", false, false, false);
plPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]pl:.*", true, false));
unifierConfig.setEquivalence("number", "plural", plPatternToken);
PatternToken femPatternToken = new PatternToken("", false, false, false);
femPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]f", true, false));
unifierConfig.setEquivalence("gender", "feminine", femPatternToken);
PatternToken mascPatternToken = new PatternToken("", false, false, false);
mascPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]m", true, false));
unifierConfig.setEquivalence("gender", "masculine", mascPatternToken);
Unifier uni = unifierConfig.createUnifier();
AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
AnalyzedToken sing1a = new AnalyzedToken("mała", "adj:sg:blahblah:f", "mały");
AnalyzedToken sing1b = new AnalyzedToken("małe", "adj:pl:blahblah:m", "mały");
AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
Map<String, List<String>> equiv = new HashMap<>();
equiv.put("number", null);
equiv.put("gender", null);
boolean satisfied = uni.isSatisfied(sing1, equiv);
satisfied |= uni.isSatisfied(sing1a, equiv);
satisfied |= uni.isSatisfied(sing1b, equiv);
uni.startUnify();
satisfied &= uni.isSatisfied(sing2, equiv);
uni.startNextToken();
satisfied &= uni.getFinalUnificationValue(equiv);
assertEquals(true, satisfied);
assertEquals("[mały[mały/adj:sg:blahblah:m*], człowiek[człowiek/subst:sg:blahblah:m*]]", Arrays.toString(uni.getUnifiedTokens()));
uni.reset();
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class ManualTaggerAdapter method tag.
@Override
public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException {
List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
int pos = 0;
for (String word : sentenceTokens) {
List<AnalyzedToken> l = new ArrayList<>();
List<TaggedWord> manualTags = manualTagger.tag(word.toLowerCase());
for (TaggedWord manualTag : manualTags) {
l.add(new AnalyzedToken(word, manualTag.getPosTag(), manualTag.getLemma()));
}
if (l.isEmpty()) {
l.add(new AnalyzedToken(word, null, null));
}
tokenReadings.add(new AnalyzedTokenReadings(l, pos));
pos += word.length();
}
return tokenReadings;
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class EnglishChunkFilterTest method testPluralByPluralNoun.
@Test
public void testPluralByPluralNoun() throws IOException {
String input = "I/X have/N-VP ten/B-NP books/I-NP ./.";
List<ChunkTaggedToken> tokens = makeTokens(input);
// 'books'
tokens.remove(3);
AnalyzedTokenReadings readings = new AnalyzedTokenReadings(Arrays.asList(new AnalyzedToken("books", "NNS", "book"), new AnalyzedToken("books", "VBZ", "book")), 0);
tokens.add(3, new ChunkTaggedToken("books", Collections.singletonList(new ChunkTag("I-NP")), readings));
assertChunks(tokens, "I/X have/N-VP ten/B-NP-plural books/E-NP-plural ./.");
}
Aggregations