use of com.yahoo.language.process.TokenType in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase.
@Test
public void requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase() {
SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM));
for (boolean specialToken : Arrays.asList(true, false)) {
for (TokenType type : TokenType.values()) {
if (!specialToken && !type.isIndexable()) {
continue;
}
assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken));
}
}
}
use of com.yahoo.language.process.TokenType in project vespa by vespa-engine.
the class SimpleTokenTestCase method requireThatTypeAccessorsWork.
@Test
public void requireThatTypeAccessorsWork() {
SimpleToken token = new SimpleToken("foo");
assertEquals(TokenType.UNKNOWN, token.getType());
for (TokenType type : TokenType.values()) {
token.setType(type);
assertEquals(type, token.getType());
}
SimpleToken other = new SimpleToken("foo");
for (TokenType type : TokenType.values()) {
other.setType(type);
if (type == token.getType()) {
assertEquals(token, other);
} else {
assertFalse(token.equals(other));
}
}
}
use of com.yahoo.language.process.TokenType in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatTermAnnotationsAreLowerCased.
@Test
public void requireThatTermAnnotationsAreLowerCased() {
SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
for (boolean specialToken : Arrays.asList(true, false)) {
for (TokenType type : TokenType.values()) {
if (!specialToken && !type.isIndexable()) {
continue;
}
assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken));
}
}
}
use of com.yahoo.language.process.TokenType in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatIndexableTokenStringsAreAnnotated.
@Test
public void requireThatIndexableTokenStringsAreAnnotated() {
SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
for (TokenType type : TokenType.values()) {
if (!type.isIndexable()) {
continue;
}
assertAnnotations(expected, "foo", newToken("foo", "bar", type));
}
}
use of com.yahoo.language.process.TokenType in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatMaxTermOccurencesIsHonored.
@Test
public void requireThatMaxTermOccurencesIsHonored() {
final String inputTerm = "foo";
// completely different from
final String stemmedInputTerm = "bar";
// inputTerm for safer test
final String paddedInputTerm = inputTerm + " ";
final SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
final int inputTermOccurence = AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES * 2;
for (int i = 0; i < AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES; ++i) {
expected.spanList().span(i * paddedInputTerm.length(), inputTerm.length()).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue(stemmedInputTerm)));
}
for (TokenType type : TokenType.values()) {
if (!type.isIndexable()) {
continue;
}
StringBuilder input = new StringBuilder();
Token[] tokens = new Token[inputTermOccurence];
for (int i = 0; i < inputTermOccurence; ++i) {
SimpleToken t = newToken(inputTerm, stemmedInputTerm, type);
t.setOffset(i * paddedInputTerm.length());
tokens[i] = t;
input.append(paddedInputTerm);
}
assertAnnotations(expected, input.toString(), tokens);
}
}
Aggregations