use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class ManualTaggerAdapterTest method testMultiplePOS.
@Test
public void testMultiplePOS() throws Exception {
List<String> l = Arrays.asList("inflectedform2");
List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
assertNotNull(analyzedTokenReadings);
assertEquals(1, analyzedTokenReadings.size());
AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
assertEquals("inflectedform2", analyzedTokenReading.getToken());
assertNotNull(analyzedTokenReading.getReadings());
assertEquals(3, analyzedTokenReading.getReadingsLength());
AnalyzedToken analyzedToken;
analyzedToken = analyzedTokenReading.getReadings().get(0);
assertEquals("POS1a", analyzedToken.getPOSTag());
assertEquals("inflectedform2", analyzedToken.getToken());
assertEquals("lemma2", analyzedToken.getLemma());
analyzedToken = analyzedTokenReading.getReadings().get(1);
assertEquals("POS1b", analyzedToken.getPOSTag());
assertEquals("inflectedform2", analyzedToken.getToken());
assertEquals("lemma2", analyzedToken.getLemma());
analyzedToken = analyzedTokenReading.getReadings().get(2);
assertEquals("POS1c", analyzedToken.getPOSTag());
assertEquals("inflectedform2", analyzedToken.getToken());
assertEquals("lemma2", analyzedToken.getLemma());
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class ManualTaggerAdapterTest method testMultipleLemma.
@Test
public void testMultipleLemma() throws Exception {
List<String> l = Arrays.asList("inflectedform3");
List<AnalyzedTokenReadings> analyzedTokenReadings = tagger.tag(l);
assertNotNull(analyzedTokenReadings);
assertEquals(1, analyzedTokenReadings.size());
AnalyzedTokenReadings analyzedTokenReading = analyzedTokenReadings.get(0);
assertEquals("inflectedform3", analyzedTokenReading.getToken());
assertNotNull(analyzedTokenReading.getReadings());
assertEquals(4, analyzedTokenReading.getReadingsLength());
AnalyzedToken analyzedToken;
analyzedToken = analyzedTokenReading.getReadings().get(0);
assertEquals("inflectedform3", analyzedToken.getToken());
assertEquals("lemma3a", analyzedToken.getLemma());
assertEquals("POS3a", analyzedToken.getPOSTag());
analyzedToken = analyzedTokenReading.getReadings().get(1);
assertEquals("inflectedform3", analyzedToken.getToken());
assertEquals("lemma3b", analyzedToken.getLemma());
assertEquals("POS3b", analyzedToken.getPOSTag());
analyzedToken = analyzedTokenReading.getReadings().get(2);
assertEquals("inflectedform3", analyzedToken.getToken());
assertEquals("lemma3c", analyzedToken.getLemma());
assertEquals("POS3c", analyzedToken.getPOSTag());
analyzedToken = analyzedTokenReading.getReadings().get(3);
assertEquals("inflectedform3", analyzedToken.getToken());
assertEquals("lemma3d", analyzedToken.getLemma());
assertEquals("POS3d", analyzedToken.getPOSTag());
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class RuleFilterEvaluatorTest method testDuplicateKey.
@Test(expected = RuntimeException.class)
public void testDuplicateKey() throws Exception {
AnalyzedTokenReadings[] readingsList = { new AnalyzedTokenReadings(new AnalyzedToken("fake1", "SENT_START", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake1", "pos", null), 0), new AnalyzedTokenReadings(new AnalyzedToken("fake2", "pos", null), 0) };
eval.getResolvedArguments("year:\\1 year:\\2", readingsList, Arrays.asList(1, 2));
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class GermanHelperTest method testHasReadingOfType.
@Test
public void testHasReadingOfType() throws Exception {
AnalyzedTokenReadings readings = new AnalyzedTokenReadings(new AnalyzedToken("der", "ART:DEF:DAT:SIN:FEM", null), 0);
assertTrue(GermanHelper.hasReadingOfType(readings, GermanToken.POSType.DETERMINER));
assertFalse(GermanHelper.hasReadingOfType(readings, GermanToken.POSType.NOMEN));
}
use of org.languagetool.AnalyzedToken in project languagetool by languagetool-org.
the class LanguageToolFilter method incrementToken.
@Override
public boolean incrementToken() throws IOException {
if (posStack.size() > 0) {
String pop = posStack.pop();
restoreState(current);
termAtt.append(pop);
posIncrAtt.setPositionIncrement(0);
typeAtt.setType("pos");
return true;
}
if (tokenIter == null || !tokenIter.hasNext()) {
// there are no remaining tokens from the current sentence... are there more sentences?
if (input.incrementToken()) {
// a new sentence is available: process it.
String sentenceStr = termAtt.toString();
collectedInput.append(sentenceStr);
if (sentenceStr.length() >= 255) {
// later. See https://github.com/languagetool-org/languagetool/issues/364
return true;
} else {
sentenceStr = collectedInput.toString();
collectedInput.setLength(0);
}
AnalyzedSentence sentence = languageTool.getAnalyzedSentence(sentenceStr);
List<AnalyzedTokenReadings> tokenBuffer = Arrays.asList(sentence.getTokens());
tokenIter = tokenBuffer.iterator();
/*
* it should not be possible to have a sentence with 0 words, check just in case. returning
* EOS isn't the best either, but it's the behavior of the original code.
*/
if (!tokenIter.hasNext()) {
return false;
}
} else {
// no more sentences, end of stream!
return false;
}
}
// It must clear attributes, as it is creating new tokens.
clearAttributes();
AnalyzedTokenReadings tr = tokenIter.next();
// add POS tag for sentence start.
if (tr.isSentenceStart()) {
// TODO: would be needed so negated tokens can match on something (see testNegatedMatchAtSentenceStart())
// but breaks other cases:
//termAtt.append("SENT_START");
typeAtt.setType("pos");
String posTag = tr.getAnalyzedToken(0).getPOSTag();
String lemma = tr.getAnalyzedToken(0).getLemma();
if (toLowerCase) {
termAtt.append(POS_PREFIX.toLowerCase()).append(posTag.toLowerCase());
if (lemma != null) {
termAtt.append(LEMMA_PREFIX.toLowerCase()).append(lemma.toLowerCase());
}
} else {
termAtt.append(POS_PREFIX).append(posTag);
if (lemma != null) {
termAtt.append(LEMMA_PREFIX).append(lemma);
}
}
return true;
}
// by pass the white spaces.
if (tr.isWhitespace()) {
return this.incrementToken();
}
offsetAtt.setOffset(tr.getStartPos(), tr.getEndPos());
for (AnalyzedToken token : tr) {
if (token.getPOSTag() != null) {
if (toLowerCase) {
posStack.push(POS_PREFIX.toLowerCase() + token.getPOSTag().toLowerCase());
} else {
posStack.push(POS_PREFIX + token.getPOSTag());
}
}
if (token.getLemma() != null) {
if (toLowerCase) {
posStack.push(LEMMA_PREFIX.toLowerCase() + token.getLemma().toLowerCase());
} else {
// chances are good this is the same for all loop iterations, store it anyway...
posStack.push(LEMMA_PREFIX + token.getLemma());
}
}
}
current = captureState();
if (toLowerCase) {
termAtt.append(tr.getAnalyzedToken(0).getToken().toLowerCase());
} else {
termAtt.append(tr.getAnalyzedToken(0).getToken());
}
return true;
}
Aggregations