Search in sources :

Example 86 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testNegation.

@Test
public void testNegation() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
    unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
    unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*:f"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*:m"));
    Unifier uni = unifierConfig.createUnifier();
    //Latin adjectives
    AnalyzedToken sing_masc = new AnalyzedToken("parvus", "adj:sg:blahblah:m", "parvus");
    AnalyzedToken plur_masc = new AnalyzedToken("parvi", "adj:sg:blahblah:m", "parvus");
    AnalyzedToken plur_fem = new AnalyzedToken("parvae", "adj:pl:blahblah:f", "parvus");
    AnalyzedToken sing_fem = new AnalyzedToken("parva", "adj:sg:blahblah:f", "parvus");
    //Let's pretend Latin has determiners
    AnalyzedToken det_sing_fem = new AnalyzedToken("una", "det:sg:blahblah:f", "unus");
    AnalyzedToken det_plur_fem = new AnalyzedToken("unae", "det:pl:blahblah:f", "unus");
    AnalyzedToken det_sing_masc = new AnalyzedToken("unus", "det:sg:blahblah:m", "unus");
    AnalyzedToken det_plur_masc = new AnalyzedToken("uni", "det:sg:blahblah:m", "unus");
    //and nouns
    AnalyzedToken subst_sing_fem = new AnalyzedToken("discrepatio", "subst:sg:blahblah:f", "discrepatio");
    AnalyzedToken subst_plur_fem = new AnalyzedToken("discrepationes", "subst:sg:blahblah:f", "discrepatio");
    AnalyzedToken subst_sing_masc = new AnalyzedToken("homo", "sg:sg:blahblah:m", "homo");
    AnalyzedToken subst_plur_masc = new AnalyzedToken("homines", "sg:sg:blahblah:m", "homo");
    //now we should have 4x4x4 combinations...
    Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    boolean satisfied = uni.isSatisfied(det_sing_masc, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing_masc, equiv);
    uni.startNextToken();
    satisfied &= uni.isSatisfied(subst_sing_masc, equiv);
    uni.startNextToken();
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    uni.reset();
    //now test the simplified interface
    uni.isUnified(det_sing_masc, equiv, true);
    uni.isUnified(sing_masc, equiv, true);
    assertEquals(true, uni.isUnified(subst_sing_masc, equiv, true));
    uni.reset();
    //now let's negate this
    //traditional way
    satisfied = uni.isSatisfied(det_sing_masc, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing_masc, equiv);
    uni.startNextToken();
    satisfied &= uni.isSatisfied(subst_sing_masc, equiv);
    uni.startNextToken();
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(false, !satisfied);
    uni.reset();
    //now test the simplified interface
    uni.isUnified(det_sing_masc, equiv, true);
    uni.isUnified(sing_masc, equiv, true);
    assertEquals(false, !uni.isUnified(subst_sing_masc, equiv, true));
    uni.reset();
    //OK, so let's test it with something that is not correct
    uni.isUnified(det_sing_fem, equiv, true);
    uni.isUnified(sing_masc, equiv, true);
    assertEquals(true, !uni.isUnified(subst_sing_masc, equiv, true));
    uni.reset();
    //OK, so let's test it with something that is not correct
    uni.isUnified(det_sing_masc, equiv, true);
    uni.isUnified(sing_fem, equiv, true);
    assertEquals(true, !uni.isUnified(subst_sing_masc, equiv, true));
    uni.reset();
    //OK, second token does not match
    uni.isUnified(det_sing_masc, equiv, true);
    uni.isUnified(sing_masc, equiv, true);
    assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
    uni.reset();
    //OK, second token does not match
    uni.isUnified(det_sing_masc, equiv, true);
    uni.isUnified(plur_masc, equiv, true);
    assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
    uni.reset();
    //OK, second token does not match
    uni.isUnified(det_sing_masc, equiv, true);
    uni.isUnified(plur_fem, equiv, true);
    assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
    uni.reset();
    //and another one
    uni.isUnified(det_plur_fem, equiv, true);
    uni.isUnified(plur_fem, equiv, true);
    assertEquals(true, !uni.isUnified(subst_sing_fem, equiv, true));
    uni.reset();
    //and another one
    uni.isUnified(det_sing_fem, equiv, true);
    uni.isUnified(plur_fem, equiv, true);
    assertEquals(true, !uni.isUnified(subst_plur_fem, equiv, true));
    uni.reset();
    //and another one
    uni.isUnified(det_sing_fem, equiv, true);
    uni.isUnified(plur_fem, equiv, true);
    assertEquals(true, !uni.isUnified(subst_plur_masc, equiv, true));
    uni.reset();
    //and another one
    uni.isUnified(det_plur_masc, equiv, true);
    uni.isUnified(plur_fem, equiv, true);
    assertEquals(true, !uni.isUnified(subst_plur_masc, equiv, true));
    uni.reset();
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) Test(org.junit.Test)

Example 87 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testAddNeutralElement.

@Test
public void testAddNeutralElement() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
    unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
    unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m([\\.:].*)?"));
    unifierConfig.setEquivalence("gender", "neutral", preparePOSElement(".*[\\.:]n([\\.:].*)?"));
    Unifier uni = unifierConfig.createUnifier();
    Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    AnalyzedToken sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
    AnalyzedToken sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
    AnalyzedToken sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
    AnalyzedToken comma = new AnalyzedToken(",", "comma", ",");
    uni.isUnified(sing1a, equiv, false);
    uni.isUnified(sing1b, equiv, true);
    uni.addNeutralElement(new AnalyzedTokenReadings(comma, 0));
    assertEquals(true, uni.isUnified(sing2, equiv, true));
    assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], ,[,/comma*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
    uni.reset();
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings) Test(org.junit.Test)

Example 88 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class UnifierTest method testUnificationNumberGender.

//slightly non-trivial unification = test if the grammatical number & gender is the same
@Test
public void testUnificationNumberGender() {
    UnifierConfiguration unifierConfig = new UnifierConfiguration();
    PatternToken sgPatternToken = new PatternToken("", false, false, false);
    sgPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]sg:.*", true, false));
    unifierConfig.setEquivalence("number", "singular", sgPatternToken);
    PatternToken plPatternToken = new PatternToken("", false, false, false);
    plPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]pl:.*", true, false));
    unifierConfig.setEquivalence("number", "plural", plPatternToken);
    PatternToken femPatternToken = new PatternToken("", false, false, false);
    femPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]f", true, false));
    unifierConfig.setEquivalence("gender", "feminine", femPatternToken);
    PatternToken mascPatternToken = new PatternToken("", false, false, false);
    mascPatternToken.setPosToken(new PatternToken.PosToken(".*[\\.:]m", true, false));
    unifierConfig.setEquivalence("gender", "masculine", mascPatternToken);
    Unifier uni = unifierConfig.createUnifier();
    AnalyzedToken sing1 = new AnalyzedToken("mały", "adj:sg:blahblah:m", "mały");
    AnalyzedToken sing1a = new AnalyzedToken("mała", "adj:sg:blahblah:f", "mały");
    AnalyzedToken sing1b = new AnalyzedToken("małe", "adj:pl:blahblah:m", "mały");
    AnalyzedToken sing2 = new AnalyzedToken("człowiek", "subst:sg:blahblah:m", "człowiek");
    Map<String, List<String>> equiv = new HashMap<>();
    equiv.put("number", null);
    equiv.put("gender", null);
    boolean satisfied = uni.isSatisfied(sing1, equiv);
    satisfied |= uni.isSatisfied(sing1a, equiv);
    satisfied |= uni.isSatisfied(sing1b, equiv);
    uni.startUnify();
    satisfied &= uni.isSatisfied(sing2, equiv);
    uni.startNextToken();
    satisfied &= uni.getFinalUnificationValue(equiv);
    assertEquals(true, satisfied);
    assertEquals("[mały[mały/adj:sg:blahblah:m*], człowiek[człowiek/subst:sg:blahblah:m*]]", Arrays.toString(uni.getUnifiedTokens()));
    uni.reset();
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) Test(org.junit.Test)

Example 89 with AnalyzedToken

use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.

the class ManualTaggerAdapter method tag.

@Override
public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException {
    List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
    int pos = 0;
    for (String word : sentenceTokens) {
        List<AnalyzedToken> l = new ArrayList<>();
        List<TaggedWord> manualTags = manualTagger.tag(word.toLowerCase());
        for (TaggedWord manualTag : manualTags) {
            l.add(new AnalyzedToken(word, manualTag.getPosTag(), manualTag.getLemma()));
        }
        if (l.isEmpty()) {
            l.add(new AnalyzedToken(word, null, null));
        }
        tokenReadings.add(new AnalyzedTokenReadings(l, pos));
        pos += word.length();
    }
    return tokenReadings;
}
Also used : AnalyzedToken(org.languagetool.AnalyzedToken) TaggedWord(org.languagetool.tagging.TaggedWord) ArrayList(java.util.ArrayList) AnalyzedTokenReadings(org.languagetool.AnalyzedTokenReadings)

Aggregations

AnalyzedToken (org.languagetool.AnalyzedToken)89 AnalyzedTokenReadings (org.languagetool.AnalyzedTokenReadings)48 ArrayList (java.util.ArrayList)43 Matcher (java.util.regex.Matcher)16 Test (org.junit.Test)16 IOException (java.io.IOException)9 Pattern (java.util.regex.Pattern)7 Nullable (org.jetbrains.annotations.Nullable)6 TaggedWord (org.languagetool.tagging.TaggedWord)6 RuleMatch (org.languagetool.rules.RuleMatch)4 Synthesizer (org.languagetool.synthesis.Synthesizer)4 InputStream (java.io.InputStream)2 HashMap (java.util.HashMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Scanner (java.util.Scanner)2 TreeSet (java.util.TreeSet)2 DictionaryLookup (morfologik.stemming.DictionaryLookup)2 IStemmer (morfologik.stemming.IStemmer)2 AnalyzedSentence (org.languagetool.AnalyzedSentence)2 ChunkTag (org.languagetool.chunking.ChunkTag)2