Search in sources :

Example 1 with Annotation

use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatCompositeTokensAreFlattened.

@Test
public void requireThatCompositeTokensAreFlattened() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("foo")));
    expected.spanList().span(3, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
    expected.spanList().span(6, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
    SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0)).addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3).addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3)).addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
    assertAnnotations(expected, "foobarbaz", token);
}
Also used : StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) SimpleToken(com.yahoo.language.simple.SimpleToken) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Example 2 with Annotation

use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatExistingAnnotationsAreKept.

@Test
public void requireThatExistingAnnotationsAreKept() {
    SpanTree spanTree = new SpanTree(SpanTrees.LINGUISTICS);
    spanTree.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("baz")));
    StringFieldValue val = new StringFieldValue("foo");
    val.setSpanTree(spanTree);
    Linguistics linguistics = newLinguistics(Arrays.asList(newToken("foo", "bar", TokenType.ALPHABETIC, false)), Collections.<String, String>emptyMap());
    new LinguisticsAnnotator(linguistics, CONFIG).annotate(val);
    assertTrue(new LinguisticsAnnotator(linguistics, CONFIG).annotate(val));
    assertEquals(spanTree, val.getSpanTree(SpanTrees.LINGUISTICS));
}
Also used : StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) Linguistics(com.yahoo.language.Linguistics) SimpleLinguistics(com.yahoo.language.simple.SimpleLinguistics) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Example 3 with Annotation

use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatCompositeSpecialTokensAreNotFlattened.

@Test
public void requireThatCompositeSpecialTokensAreNotFlattened() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 9).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("foobarbaz")));
    SimpleToken token = newToken("FOOBARBAZ", "foobarbaz", TokenType.ALPHABETIC).setSpecialToken(true).addComponent(newToken("FOO", "foo", TokenType.ALPHABETIC).setOffset(0)).addComponent(newToken("BARBAZ", "barbaz", TokenType.ALPHABETIC).setOffset(3).addComponent(newToken("BAR", "bar", TokenType.ALPHABETIC).setOffset(3)).addComponent(newToken("BAZ", "baz", TokenType.ALPHABETIC).setOffset(6)));
    assertAnnotations(expected, "foobarbaz", token);
}
Also used : StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) SimpleToken(com.yahoo.language.simple.SimpleToken) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Example 4 with Annotation

use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase.

@Test
public void requireThatTermAnnotationsAreEmptyIfOrigIsLowerCase() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM));
    for (boolean specialToken : Arrays.asList(true, false)) {
        for (TokenType type : TokenType.values()) {
            if (!specialToken && !type.isIndexable()) {
                continue;
            }
            assertAnnotations(expected, "foo", newToken("foo", "foo", type, specialToken));
        }
    }
}
Also used : TokenType(com.yahoo.language.process.TokenType) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Example 5 with Annotation

use of com.yahoo.document.annotation.Annotation in project vespa by vespa-engine.

the class SchemaMappingAndAccessesTest method getDoc.

private Document getDoc() {
    DocumentType type = new DocumentType("album");
    AnnotationType personType = new AnnotationType("person");
    Annotation person = new Annotation(personType);
    type.addField("title", DataType.STRING);
    type.addField("artist", DataType.STRING);
    type.addField("guitarist", DataType.STRING);
    type.addField("year", DataType.INT);
    type.addField("labels", DataType.getArray(DataType.STRING));
    Document doc = new Document(type, new DocumentId("doc:map:test:1"));
    doc.setFieldValue("title", new StringFieldValue("Black Rock"));
    StringFieldValue joe = new StringFieldValue("Joe Bonamassa");
    joe.setSpanTree(new SpanTree("mytree").annotate(person));
    doc.setFieldValue("artist", joe);
    doc.setFieldValue("year", new IntegerFieldValue(2010));
    Array<StringFieldValue> labels = new Array<>(type.getField("labels").getDataType());
    labels.add(new StringFieldValue("audun"));
    labels.add(new StringFieldValue("tylden"));
    doc.setFieldValue("labels", labels);
    StructDataType personStructType = new StructDataType("artist");
    personStructType.addField(new com.yahoo.document.Field("firstname", DataType.STRING));
    personStructType.addField(new com.yahoo.document.Field("lastname", DataType.STRING));
    type.addField("listeners", DataType.getArray(personStructType));
    Array<Struct> listeners = new Array<>(type.getField("listeners").getDataType());
    Struct listenerOne = new Struct(personStructType);
    listenerOne.setFieldValue("firstname", new StringFieldValue("per"));
    listenerOne.setFieldValue("lastname", new StringFieldValue("olsen"));
    Struct listenerTwo = new Struct(personStructType);
    listenerTwo.setFieldValue("firstname", new StringFieldValue("anders"));
    listenerTwo.setFieldValue("lastname", new StringFieldValue("and"));
    listeners.add(listenerOne);
    listeners.add(listenerTwo);
    doc.setFieldValue("listeners", listeners);
    return doc;
}
Also used : DocumentId(com.yahoo.document.DocumentId) DocumentType(com.yahoo.document.DocumentType) IntegerFieldValue(com.yahoo.document.datatypes.IntegerFieldValue) Document(com.yahoo.document.Document) AnnotationType(com.yahoo.document.annotation.AnnotationType) Annotation(com.yahoo.document.annotation.Annotation) Struct(com.yahoo.document.datatypes.Struct) Array(com.yahoo.document.datatypes.Array) StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) StructDataType(com.yahoo.document.StructDataType) SpanTree(com.yahoo.document.annotation.SpanTree)

Aggregations

Annotation (com.yahoo.document.annotation.Annotation)20 SpanTree (com.yahoo.document.annotation.SpanTree)17 Test (org.junit.Test)13 StringFieldValue (com.yahoo.document.datatypes.StringFieldValue)12 AnnotationType (com.yahoo.document.annotation.AnnotationType)6 TokenType (com.yahoo.language.process.TokenType)6 Span (com.yahoo.document.annotation.Span)3 SimpleToken (com.yahoo.language.simple.SimpleToken)3 ProxyDocument (com.yahoo.docproc.proxy.ProxyDocument)2 Document (com.yahoo.document.Document)2 SpanList (com.yahoo.document.annotation.SpanList)2 SpanNode (com.yahoo.document.annotation.SpanNode)2 Linguistics (com.yahoo.language.Linguistics)2 SimpleLinguistics (com.yahoo.language.simple.SimpleLinguistics)2 TestDocumentProcessor1 (com.yahoo.docproc.DocumentProcessingAbstractTestCase.TestDocumentProcessor1)1 DocumentId (com.yahoo.document.DocumentId)1 DocumentType (com.yahoo.document.DocumentType)1 Field (com.yahoo.document.Field)1 StructDataType (com.yahoo.document.StructDataType)1 AbstractTypesTest (com.yahoo.document.annotation.AbstractTypesTest)1