use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class ManualTaggerAdapterTest method testMultipleWords.
@Test
public void testMultipleWords() throws Exception {
List<String> l = Arrays.asList("inflectedform2", "inflectedform3");
List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
assertNotNull(analyzedTokenReadings);
assertEquals(2, analyzedTokenReadings.size());
AnalyzedTokenReadings analyzedTokenReading;
analyzedTokenReading = analyzedTokenReadings.get(0);
assertEquals("inflectedform2", analyzedTokenReading.getToken());
assertNotNull(analyzedTokenReading.getReadings());
assertEquals(3, analyzedTokenReading.getReadingsLength());
// analyzedTokenReading.getReadings are tested by #testMultipleLemma()
analyzedTokenReading = analyzedTokenReadings.get(1);
assertEquals("inflectedform3", analyzedTokenReading.getToken());
assertNotNull(analyzedTokenReading.getReadings());
assertEquals(4, analyzedTokenReading.getReadingsLength());
// analyzedTokenReading.getReadings are tested by #testMultiplePOS()
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class WordTokenizer method run.
private void run(final String lang) throws IOException {
JLanguageTool langTool = new JLanguageTool(Languages.getLanguageForShortCode(lang));
BufferedReader in = null;
BufferedWriter out = null;
try {
in = new BufferedReader(new InputStreamReader(System.in));
out = new BufferedWriter(new OutputStreamWriter(System.out));
String line;
while ((line = in.readLine()) != null) {
AnalyzedTokenReadings[] atr = langTool.getRawAnalyzedSentence(line).getTokensWithoutWhitespace();
for (AnalyzedTokenReadings a : atr) {
out.write(a.getToken());
out.write("\n");
}
}
} finally {
if (in != null) {
in.close();
}
if (out != null) {
out.flush();
out.close();
}
}
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class UnifierTest method testAddNeutralElement.
@Test
public void testAddNeutralElement() {
UnifierConfiguration unifierConfig = new UnifierConfiguration();
unifierConfig.setEquivalence("number", "singular", preparePOSElement(".*[\\.:]sg:.*"));
unifierConfig.setEquivalence("number", "plural", preparePOSElement(".*[\\.:]pl:.*"));
unifierConfig.setEquivalence("gender", "feminine", preparePOSElement(".*[\\.:]f([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "masculine", preparePOSElement(".*[\\.:]m([\\.:].*)?"));
unifierConfig.setEquivalence("gender", "neutral", preparePOSElement(".*[\\.:]n([\\.:].*)?"));
Unifier uni = unifierConfig.createUnifier();
Map<String, List<String>> equiv = new HashMap<>();
equiv.put("number", null);
equiv.put("gender", null);
AnalyzedToken sing1a = new AnalyzedToken("osobiste", "adj:pl:nom.acc.voc:f.n.m2.m3:pos:aff", "osobisty");
AnalyzedToken sing1b = new AnalyzedToken("osobiste", "adj:sg:nom.acc.voc:n:pos:aff", "osobisty");
AnalyzedToken sing2 = new AnalyzedToken("godło", "subst:sg:nom.acc.voc:n", "godło");
AnalyzedToken comma = new AnalyzedToken(",", "comma", ",");
uni.isUnified(sing1a, equiv, false);
uni.isUnified(sing1b, equiv, true);
uni.addNeutralElement(new AnalyzedTokenReadings(comma, 0));
assertEquals(true, uni.isUnified(sing2, equiv, true));
assertEquals("[osobiste[osobisty/adj:sg:nom.acc.voc:n:pos:aff*], ,[,/comma*], godło[godło/subst:sg:nom.acc.voc:n*]]", Arrays.toString(uni.getFinalUnified()));
uni.reset();
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class ManualTaggerAdapter method tag.
@Override
public List<AnalyzedTokenReadings> tag(List<String> sentenceTokens) throws IOException {
List<AnalyzedTokenReadings> tokenReadings = new ArrayList<>();
int pos = 0;
for (String word : sentenceTokens) {
List<AnalyzedToken> l = new ArrayList<>();
List<TaggedWord> manualTags = manualTagger.tag(word.toLowerCase());
for (TaggedWord manualTag : manualTags) {
l.add(new AnalyzedToken(word, manualTag.getPosTag(), manualTag.getLemma()));
}
if (l.isEmpty()) {
l.add(new AnalyzedToken(word, null, null));
}
tokenReadings.add(new AnalyzedTokenReadings(l, pos));
pos += word.length();
}
return tokenReadings;
}
use of org.languagetool.AnalyzedTokenReadings in project languagetool by languagetool-org.
the class MissingGenitiveFinder method hasEsGenitive.
private boolean hasEsGenitive(GermanTagger tagger, String word) throws IOException {
String esForm = word.replaceFirst("s$", "es");
List<AnalyzedTokenReadings> readings = tagger.tag(Collections.singletonList(esForm));
for (AnalyzedTokenReadings reading : readings) {
if (reading.isTagged()) {
return true;
}
}
return false;
}
Aggregations