use of com.joliciel.talismane.rawText.Sentence in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternal.
@Test
public void testCheckInternal() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("une dame", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("dame", tokenSequence, 1, "une ".length(), "une dame".length(), sessionId);
Decision decision = new Decision("NC", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> gender = new StringLiteralFeature<>(LexicalAttribute.Gender.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, gender);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
assertEquals("f", outcomes.get(0).getOutcome());
assertEquals(1, outcomes.size());
}
use of com.joliciel.talismane.rawText.Sentence in project talismane by joliciel-informatique.
the class LexicalAttributeFeatureTest method testCheckInternalMultipleEntries.
@Test
public void testCheckInternalMultipleEntries() throws Exception {
System.setProperty("config.file", "src/test/resources/testWithLex.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
Sentence sentence = new Sentence("je demande", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
Token token = new Token("demande", tokenSequence, 1, "je ".length(), "je demande".length(), sessionId);
Decision decision = new Decision("V", 1.0);
final PosTaggedToken posTaggedToken = new PosTaggedToken(token, decision, sessionId);
PosTaggedTokenAddressFunction<PosTaggerContext> addressFunction = new AbstractPosTaggedTokenAddressFunction() {
@Override
protected FeatureResult<PosTaggedTokenWrapper> checkInternal(PosTaggerContext context, RuntimeEnvironment env) {
return this.generateResult(posTaggedToken);
}
};
StringLiteralFeature<PosTaggedTokenWrapper> person = new StringLiteralFeature<>(LexicalAttribute.Person.name());
LexicalAttributeFeature<PosTaggerContext> feature = new LexicalAttributeFeature<>(addressFunction, person);
PosTagSequence history = new PosTagSequence(tokenSequence);
PosTaggerContext context = new PosTaggerContextImpl(token, history);
RuntimeEnvironment env = new RuntimeEnvironment();
FeatureResult<List<WeightedOutcome<String>>> featureResult = feature.checkInternal(context, env);
List<WeightedOutcome<String>> outcomes = featureResult.getOutcome();
System.out.println(outcomes);
for (WeightedOutcome<String> outcome : outcomes) {
assertTrue("1".equals(outcome.getOutcome()) || "3".equals(outcome.getOutcome()));
}
assertEquals(2, outcomes.size());
}
use of com.joliciel.talismane.rawText.Sentence in project talismane by joliciel-informatique.
the class SimpleTokeniserTest method testTokenise.
@Test
public void testTokenise() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
final Sentence sentence = new Sentence("Click http://www.blah-di-blah.com now", sessionId);
List<Annotation<TokenPlaceholder>> annotations = new ArrayList<>();
Annotation<TokenPlaceholder> annotation = new Annotation<TokenPlaceholder>("Click ".length(), "Click http://www.blah-di-blah.com".length(), new TokenPlaceholder("URL", ""), labels);
annotations.add(annotation);
sentence.addAnnotations(annotations);
SimpleTokeniser simpleTokeniser = new SimpleTokeniser(sessionId);
TokenSequence tokenSequence = simpleTokeniser.tokeniseSentence(sentence);
System.out.println(tokenSequence.toString());
assertEquals(3, tokenSequence.size());
assertEquals("Click", tokenSequence.get(0).getAnalyisText());
assertEquals("URL", tokenSequence.get(1).getAnalyisText());
assertEquals("now", tokenSequence.get(2).getAnalyisText());
List<Annotation<TokenBoundary>> tokenBoundaries = sentence.getAnnotations(TokenBoundary.class);
assertEquals(3, tokenBoundaries.size());
assertEquals("".length(), tokenBoundaries.get(0).getStart());
assertEquals("Click".length(), tokenBoundaries.get(0).getEnd());
assertEquals("Click", tokenBoundaries.get(0).getData().getAnalysisText());
assertEquals("Click ".length(), tokenBoundaries.get(1).getStart());
assertEquals("URL", tokenBoundaries.get(1).getData().getAnalysisText());
assertEquals("Click http://www.blah-di-blah.com".length(), tokenBoundaries.get(1).getEnd());
assertEquals("Click http://www.blah-di-blah.com ".length(), tokenBoundaries.get(2).getStart());
assertEquals("Click http://www.blah-di-blah.com now".length(), tokenBoundaries.get(2).getEnd());
assertEquals("now", tokenBoundaries.get(2).getData().getAnalysisText());
}
use of com.joliciel.talismane.rawText.Sentence in project talismane by joliciel-informatique.
the class TokenSequenceTest method testSimpleAddByIndex.
@Test
public void testSimpleAddByIndex() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
final Sentence sentence = new Sentence("The quick brown fox.", sessionId);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
// fox
tokenSequence.addToken(16, 19);
// quick
tokenSequence.addToken(4, 9);
// quick - should be ignored
tokenSequence.addToken(4, 9);
// The
tokenSequence.addToken(0, 3);
// .
tokenSequence.addToken(19, 20);
// br - should be removed by brown
tokenSequence.addToken(10, 12);
// own - should be removed by brown
tokenSequence.addToken(12, 15);
// brown
tokenSequence.addToken(10, 15);
assertEquals(5, tokenSequence.size());
int i = 0;
for (Token token : tokenSequence) {
LOG.debug(token.getAnalyisText());
if (i == 0) {
assertEquals("The", token.getAnalyisText());
}
assertEquals(i, token.getIndex());
i++;
}
LOG.debug("Token splits:");
for (int tokenSplit : tokenSequence.getTokenSplits()) {
LOG.debug("" + tokenSplit);
}
assertEquals(9, tokenSequence.getTokenSplits().size());
}
use of com.joliciel.talismane.rawText.Sentence in project talismane by joliciel-informatique.
the class TokenSequenceTest method testTokeniseSentenceWithPlaceholders.
@Test
public void testTokeniseSentenceWithPlaceholders() throws Exception {
System.setProperty("config.file", "src/test/resources/test.conf");
ConfigFactory.invalidateCaches();
final Config config = ConfigFactory.load();
final String sessionId = "test";
String[] labels = new String[0];
final Sentence sentence = new Sentence("Write to me at joe.schome@test.com, otherwise go to http://test.com.", sessionId);
final List<Annotation<TokenPlaceholder>> placeholders = new ArrayList<>();
Annotation<TokenPlaceholder> placeholder0 = new Annotation<>("Write to me at ".length(), "Write to me at joe.schome@test.com".length(), new TokenPlaceholder("Email", "blah"), labels);
placeholders.add(placeholder0);
Annotation<TokenPlaceholder> placeholder1 = new Annotation<>("Write to me at joe.schome@test.com, otherwise go to ".length(), "Write to me at joe.schome@test.com, otherwise go to http://test.com".length(), new TokenPlaceholder("URL", "blah"), labels);
placeholders.add(placeholder1);
sentence.addAnnotations(placeholders);
TokenSequence tokenSequence = new TokenSequence(sentence, sessionId);
tokenSequence.findDefaultTokens();
assertEquals(19, tokenSequence.listWithWhiteSpace().size());
assertEquals(11, tokenSequence.size());
int i = 0;
for (Token token : tokenSequence.listWithWhiteSpace()) {
if (i == 0) {
assertEquals("Write", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 1) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 2) {
assertEquals("to", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 3) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 4) {
assertEquals("me", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 5) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 6) {
assertEquals("at", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 7) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 8) {
assertEquals("Email", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 9) {
assertEquals(",", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 10) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 11) {
assertEquals("otherwise", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 12) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 13) {
assertEquals("go", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 14) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 15) {
assertEquals("to", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 16) {
assertEquals(" ", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 17) {
assertEquals("URL", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 18) {
assertEquals(".", token.getAnalyisText());
assertEquals(true, token.isSeparator());
}
i++;
}
i = 0;
for (Token token : tokenSequence) {
if (i == 0) {
assertEquals("Write", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 1) {
assertEquals("to", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 2) {
assertEquals("me", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 3) {
assertEquals("at", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 4) {
assertEquals("Email", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 5) {
assertEquals(",", token.getAnalyisText());
assertEquals(true, token.isSeparator());
} else if (i == 6) {
assertEquals("otherwise", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 7) {
assertEquals("go", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 8) {
assertEquals("to", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 9) {
assertEquals("URL", token.getAnalyisText());
assertEquals(false, token.isSeparator());
} else if (i == 10) {
assertEquals(".", token.getAnalyisText());
assertEquals(true, token.isSeparator());
}
i++;
}
}
Aggregations