use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.
@Test
public void testCheckInternalMultipleEntries() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("je demande", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
}
assertEquals(2, outcomes.size());
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class PatternTokeniserTest method testTokenise.
@Test
public void testTokenise() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
final Sentence sentence = new Sentence("Je n'ai pas l'ourang-outan sur www.google.com.", sessionId);
List<Annotation<TokenPlaceholder>> annotations = new ArrayList<>();
Annotation<TokenPlaceholder> annotation = new Annotation<TokenPlaceholder>("Je n'ai pas l'ourang-outan sur ".length(), "Je n'ai pas l'ourang-outan sur www.google.com".length(), new TokenPlaceholder("URL", ""), labels);
annotations.add(annotation);
sentence.addAnnotations(annotations);
List<String> tokeniserPatterns = new ArrayList<String>();
tokeniserPatterns.add("IS_NOT_SEPARATOR -_");
tokeniserPatterns.add("IS_SEPARATOR_AFTER '");
TokeniserPatternManager patternManager = new TokeniserPatternManager(tokeniserPatterns, sessionId);
PatternTokeniser tokeniser = new PatternTokeniser(null, patternManager, null, 1, sessionId);
List<TokenSequence> tokenSequences = tokeniser.tokenise(sentence);
TokenSequence tokenSequence = tokenSequences.get(0);
LOG.debug(tokenSequence.toString());
assertEquals(9, tokenSequence.size());
int i = 0;
for (Token token : tokenSequence) {
if (i == 0) {
assertEquals("Je", token.getAnalyisText());
} else if (i == 1) {
assertEquals("n'", token.getAnalyisText());
} else if (i == 2) {
assertEquals("ai", token.getAnalyisText());
} else if (i == 3) {
assertEquals("pas", token.getAnalyisText());
} else if (i == 4) {
assertEquals("l'", token.getAnalyisText());
} else if (i == 5) {
assertEquals("ourang-outan", token.getAnalyisText());
} else if (i == 6) {
assertEquals("sur", token.getAnalyisText());
} else if (i == 7) {
assertEquals("URL", token.getAnalyisText());
} else if (i == 8) {
assertEquals(".", token.getAnalyisText());
}
i++;
}
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class AndRangeFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<Boolean> featureResult = null;
FeatureResult<Integer> startResult = startFeature.check(innerWrapper, env);
FeatureResult<Integer> endResult = endFeature.check(innerWrapper, env);
if (startResult != null && endResult != null) {
int start = startResult.getOutcome();
int end = endResult.getOutcome();
if (start < 0)
start = 0;
if (end > token.getTokenSequence().size() - 1)
end = token.getTokenSequence().size() - 1;
if (start <= end) {
Boolean result = Boolean.TRUE;
for (int i = start; i <= end; i++) {
Token oneToken = token.getTokenSequence().get(i);
FeatureResult<Boolean> criterionResult = this.criterion.check(oneToken, env);
if (criterionResult == null) {
result = null;
break;
}
result = result && criterionResult.getOutcome();
}
if (result != null) {
featureResult = this.generateResult(result);
}
}
}
return featureResult;
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class TokeniserPatternsFeature method checkInternal.
@Override
public FeatureResult<List<WeightedOutcome<String>>> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
Token token = tokenWrapper.getToken();
List<WeightedOutcome<String>> resultList = new ArrayList<WeightedOutcome<String>>();
for (TokenPatternMatch tokenMatch : token.getMatches()) {
if (tokenMatch.getIndex() == tokenMatch.getPattern().getIndexesToTest().get(0)) {
resultList.add(new WeightedOutcome<String>(tokenMatch.getPattern().getName(), 1.0));
}
}
return this.generateResult(resultList);
}
use of com.joliciel.talismane.tokeniser.Token in project talismane by joliciel-informatique.
the class UnknownWordFeature method checkInternal.
@Override
public FeatureResult<Boolean> checkInternal(TokenWrapper tokenWrapper, RuntimeEnvironment env) throws TalismaneException {
TokenWrapper innerWrapper = this.getToken(tokenWrapper, env);
if (innerWrapper == null)
return null;
Token token = innerWrapper.getToken();
FeatureResult<Boolean> result = null;
boolean unknownWord = token.getPossiblePosTags().size() == 0;
result = this.generateResult(unknownWord);
return result;
}
Aggregations