use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatMaxTermOccurencesIsHonored.
@Test
public void requireThatMaxTermOccurencesIsHonored() {
final String inputTerm = "foo";
// completely different from
final String stemmedInputTerm = "bar";
// inputTerm for safer test
final String paddedInputTerm = inputTerm + " ";
final SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
final int inputTermOccurence = AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES * 2;
for (int i = 0; i < AnnotatorConfig.DEFAULT_MAX_TERM_OCCURRENCES; ++i) {
expected.spanList().span(i * paddedInputTerm.length(), inputTerm.length()).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue(stemmedInputTerm)));
}
for (TokenType type : TokenType.values()) {
if (!type.isIndexable()) {
continue;
}
StringBuilder input = new StringBuilder();
Token[] tokens = new Token[inputTermOccurence];
for (int i = 0; i < inputTermOccurence; ++i) {
SimpleToken t = newToken(inputTerm, stemmedInputTerm, type);
t.setOffset(i * paddedInputTerm.length());
tokens[i] = t;
input.append(paddedInputTerm);
}
assertAnnotations(expected, input.toString(), tokens);
}
}
use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatTermReplacementsAreApplied.
@Test
public void requireThatTermReplacementsAreApplied() {
SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
for (boolean specialToken : Arrays.asList(true, false)) {
for (TokenType type : TokenType.values()) {
if (!specialToken && !type.isIndexable()) {
continue;
}
assertAnnotations(expected, "foo", newLinguistics(Arrays.asList(newToken("foo", "foo", type, specialToken)), Collections.singletonMap("foo", "bar")));
}
}
}
use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatSpecialTokenStringsAreAnnotatedRegardlessOfType.
@Test
public void requireThatSpecialTokenStringsAreAnnotatedRegardlessOfType() {
SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
for (TokenType type : TokenType.values()) {
assertAnnotations(expected, "foo", newToken("foo", "bar", type, true));
}
}
use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.
the class LinguisticsAnnotatorTestCase method requireThatTokenizeCappingWorks.
@Test
public void requireThatTokenizeCappingWorks() {
String shortString = "short string";
SpanTree spanTree = new SpanTree(SpanTrees.LINGUISTICS);
spanTree.setStringFieldValue(new StringFieldValue(shortString));
spanTree.spanList().span(0, 5).annotate(new Annotation(AnnotationTypes.TERM));
spanTree.spanList().span(6, 6).annotate(new Annotation(AnnotationTypes.TERM));
StringFieldValue shortValue = new StringFieldValue(shortString);
Linguistics linguistics = new SimpleLinguistics();
LinguisticsAnnotator annotator = new LinguisticsAnnotator(linguistics, new AnnotatorConfig().setMaxTokenLength(12));
assertTrue(annotator.annotate(shortValue));
assertEquals(spanTree, shortValue.getSpanTree(SpanTrees.LINGUISTICS));
assertEquals(shortString, shortValue.getSpanTree(SpanTrees.LINGUISTICS).getStringFieldValue().getString());
StringFieldValue cappedValue = new StringFieldValue(shortString + " a longer string");
assertTrue(annotator.annotate(cappedValue));
assertEquals((shortString + " a longer string"), cappedValue.getSpanTree(SpanTrees.LINGUISTICS).getStringFieldValue().getString());
}
use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.
the class VespaDocumentDeserializer42 method readSpanTree.
private void readSpanTree(SpanTree tree, boolean readName) {
// we don't support serialization of nested span trees:
if (spanNodes != null || annotations != null) {
throw new SerializationException("Deserialization of nested SpanTrees is not supported.");
}
// we're going to write a new SpanTree, create a new Map for nodes:
spanNodes = new ArrayList<SpanNode>();
annotations = new ArrayList<Annotation>();
try {
if (readName) {
StringFieldValue treeName = new StringFieldValue();
treeName.deserialize(this);
tree.setName(treeName.getString());
}
SpanNode root = readSpanNode();
tree.setRoot(root);
int numAnnotations = buf.getInt1_2_4Bytes();
for (int i = 0; i < numAnnotations; i++) {
Annotation a = new Annotation();
annotations.add(a);
}
for (int i = 0; i < numAnnotations; i++) {
read(annotations.get(i));
}
for (Annotation a : annotations) {
tree.annotate(a);
}
for (SpanNode node : spanNodes) {
if (node instanceof Span) {
correctIndexes((Span) node);
}
}
} finally {
// we're done, let's set this to null to save memory and prevent madness:
spanNodes = null;
annotations = null;
}
}
Aggregations