use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class MultiWordChunkerTest method testDisambiguateMultiSpace.
@Test
public void testDisambiguateMultiSpace() throws Exception {
Disambiguator chunker = new MultiWordChunker("/uk/multiwords.txt");
JLanguageTool lt = new JLanguageTool(new Ukrainian());
AnalyzedSentence analyzedSentence = lt.getAnalyzedSentence("для годиться.");
AnalyzedSentence disambiguated = chunker.disambiguate(analyzedSentence);
AnalyzedTokenReadings[] tokens = disambiguated.getTokens();
assertTrue(tokens[1].getReadings().toString().contains("<adv>"));
assertTrue(tokens[4].getReadings().toString().contains("</adv>"));
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class RuleFilterEvaluatorTest method testGetResolvedArguments.
@Test
public void testGetResolvedArguments() throws Exception {
AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0) };
Map<String, String> map = eval.getResolvedArguments("year:\\1 month:\\2", readingsList, Arrays.asList(1, 1));
assertThat(map.get("year"), is("fake1"));
assertThat(map.get("month"), is("fake2"));
assertThat(map.size(), is(2));
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class RuleFilterEvaluatorTest method testGetResolvedArgumentsWithColon.
@Test
public void testGetResolvedArgumentsWithColon() throws Exception {
AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0) };
Map<String, String> map = eval.getResolvedArguments("regex:(?:foo[xyz])bar", readingsList, Arrays.asList(1, 1));
assertThat(map.get("regex"), is("(?:foo[xyz])bar"));
assertThat(map.size(), is(1));
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class DisambiguationRuleTest method testDisambiguationRulesFromXML.
private void testDisambiguationRulesFromXML(List<DisambiguationPatternRule> rules, JLanguageTool languageTool, Language lang) throws IOException {
for (DisambiguationPatternRule rule : rules) {
String id = rule.getId();
if (rule.getUntouchedExamples() != null) {
List<String> goodSentences = rule.getUntouchedExamples();
for (String goodSentence : goodSentences) {
// enable indentation use
goodSentence = goodSentence.replaceAll("[\\n\\t]+", "");
goodSentence = cleanXML(goodSentence);
assertTrue(goodSentence.trim().length() > 0);
AnalyzedSentence sent = disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(goodSentence));
AnalyzedSentence sentToReplace = disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(goodSentence));
//note: we're testing only if string representations are equal
//it's because getRawAnalyzedSentence does not set all properties
//in AnalyzedSentence, and during equal test they are set for the
//left-hand side
assertEquals("The untouched example (" + goodSentence + ") for " + lang.getName() + " rule " + rule + "] was touched!", sent.toString(), rule.replace(sentToReplace).toString());
}
}
List<DisambiguatedExample> examples = rule.getExamples();
if (examples != null) {
for (DisambiguatedExample example : examples) {
String outputForms = example.getDisambiguated();
assertTrue("No output form found for: " + id, outputForms != null);
assertTrue(outputForms.trim().length() > 0);
int expectedMatchStart = example.getExample().indexOf("<marker>");
int expectedMatchEnd = example.getExample().indexOf("</marker>") - "<marker>".length();
if (expectedMatchStart == -1 || expectedMatchEnd == -1) {
fail(lang + ": No position markup ('<marker>...</marker>') in disambiguated example in rule " + rule);
}
String inputForms = example.getAmbiguous();
assertTrue("No input form found for: " + id, inputForms != null);
assertTrue(inputForms.trim().length() > 0);
assertTrue("Input and output forms for rule " + id + " are the same!", !outputForms.equals(inputForms));
AnalyzedSentence cleanInput = languageTool.getRawAnalyzedSentence(cleanXML(example.getExample()));
AnalyzedSentence sent = disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(cleanXML(example.getExample())));
AnalyzedSentence disambiguatedSent = rule.replace(disambiguateUntil(lang, rules, id, languageTool.getRawAnalyzedSentence(cleanXML(example.getExample()))));
assertTrue("Disambiguated sentence is equal to the non-disambiguated sentence for rule: " + id + ". The sentence was: " + sent, !cleanInput.equals(disambiguatedSent));
assertTrue("Disambiguated sentence is equal to the input sentence for rule: " + id + ". The sentence was: " + sent, !sent.equals(disambiguatedSent));
String reading = "";
String annotations = "";
for (AnalyzedTokenReadings readings : sent.getTokens()) {
if (readings.isSentenceStart() && !inputForms.contains("<S>")) {
continue;
}
if (readings.getStartPos() == expectedMatchStart) {
AnalyzedTokenReadings[] r = { readings };
reading = new AnalyzedSentence(r).toShortString(",");
annotations = readings.getHistoricalAnnotations();
int startPos = readings.getStartPos();
int endPos = readings.getEndPos();
assertTrue("Wrong marker position in the example for the rule " + id + ": got " + startPos + "-" + endPos + ", expected " + expectedMatchStart + "-" + expectedMatchEnd, startPos == expectedMatchStart && endPos == expectedMatchEnd);
break;
}
}
assertEquals("The input form for the rule " + id + " in the example: " + example + " is different than expected (expected " + inputForms + " but got " + sortForms(reading) + "). The token has been changed by the disambiguator: " + annotations, inputForms, sortForms(reading));
for (AnalyzedTokenReadings readings : disambiguatedSent.getTokens()) {
if (readings.isSentenceStart() && !outputForms.contains("<S>")) {
continue;
}
if (readings.getStartPos() == expectedMatchStart) {
AnalyzedTokenReadings[] r = { readings };
reading = new AnalyzedSentence(r).toShortString(",");
assertTrue(readings.getStartPos() == expectedMatchStart && readings.getEndPos() == expectedMatchEnd);
break;
}
}
assertEquals("The output form for the rule " + id + " in the example: " + example + " is different than expected (expected " + outputForms + " but got " + sortForms(reading) + "). The token has been changed by the disambiguator: " + annotations, outputForms, sortForms(reading));
}
}
}
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class ManualTaggerAdapterTest method testMultiplePOS.
@Test
public void testMultiplePOS() throws Exception {
List<String> l = Arrays.asList("inflectedform2");
List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
assertNotNull(analyzedTokenReadings);
assertEquals(1, analyzedTokenReadings.size());
AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
assertEquals("inflectedform2", analyzedTokenReading.getToken());
assertNotNull(analyzedTokenReading.getReadings());
assertEquals(3, analyzedTokenReading.getReadingsLength());
AnalyzedToken analyzedToken;
analyzedToken = analyzedTokenReading.getReadings().get(0);
assertEquals("POS1a", analyzedToken.getPOSTag());
assertEquals("inflectedform2", analyzedToken.getToken());
assertEquals("lemma2", analyzedToken.getLemma());
analyzedToken = analyzedTokenReading.getReadings().get(1);
assertEquals("POS1b", analyzedToken.getPOSTag());
assertEquals("inflectedform2", analyzedToken.getToken());
assertEquals("lemma2", analyzedToken.getLemma());
analyzedToken = analyzedTokenReading.getReadings().get(2);
assertEquals("POS1c", analyzedToken.getPOSTag());
assertEquals("inflectedform2", analyzedToken.getToken());
assertEquals("lemma2", analyzedToken.getLemma());
}
Aggregations