Search in sources :

Example 16 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class StringTestCase method consume.

public Document consume(Document document, DocumentTypeManager docTypeMgr) {
    DocumentType type = docTypeMgr.getDocumentType("blog");
    Collection<Field> fc = type.getFields();
    for (Field f : fc) {
        System.out.println("\n\nField Name: " + f.getName());
        System.out.println("DataType: " + f.getDataType());
        System.out.println("isHeader? " + f.isHeader());
        FieldValue val = document.getFieldValue(f);
        if (val instanceof StringFieldValue) {
            StringFieldValue sfv = (StringFieldValue) val;
            System.out.println(f.getName() + " is a StringField. Field Value: " + sfv.getString());
            Collection<SpanTree> c = sfv.getSpanTrees();
            for (SpanTree tree : c) {
                System.out.println(f.getName() + " has annotations");
                consumeAnnotations(tree, (SpanList) tree.getRoot());
            }
        }
    }
    return document;
}
Also used : Field(com.yahoo.document.Field) DocumentType(com.yahoo.document.DocumentType) SpanTree(com.yahoo.document.annotation.SpanTree)

Example 17 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class DocumentGenPluginTest method newBookGeneric.

private Document newBookGeneric(DocumentType bookT, int i, DocumentTypeManager mgr) {
    Document bookGeneric = new Document(bookT, new DocumentId("doc:book:" + i));
    bookGeneric.setFieldValue("author", new StringFieldValue("Melville"));
    StringFieldValue title = new StringFieldValue("Moby Dick");
    SpanTree titleTree = new SpanTree();
    title.setSpanTree(titleTree);
    AnnotationType dateType = mgr.getAnnotationTypeRegistry().getType("date");
    Struct dateStruct = new Struct(mgr.getAnnotationTypeRegistry().getType("date").getDataType());
    dateStruct.setFieldValue("exacttime", new LongFieldValue(99l));
    Annotation date = new Annotation(dateType);
    date.setFieldValue(dateStruct);
    titleTree.annotate(date);
    bookGeneric.setFieldValue("title", title);
    bookGeneric.setFieldValue("year", new IntegerFieldValue(1851));
    Struct myS0 = new Struct(mgr.getDataType("ss0"));
    myS0.setFieldValue("s0", new StringFieldValue("My s0"));
    myS0.setFieldValue("d0", new DoubleFieldValue(99));
    Struct myS1 = new Struct(mgr.getDataType("ss1"));
    myS1.setFieldValue("s1", new StringFieldValue("My s1"));
    myS1.setFieldValue("l1", new LongFieldValue(89));
    Array<StringFieldValue> myAs1 = new Array<>(DataType.getArray(DataType.STRING));
    myAs1.add(new StringFieldValue("as1_1"));
    myAs1.add(new StringFieldValue("as1_2"));
    myS1.setFieldValue("as1", myAs1);
    myS1.setFieldValue("ss01", myS0);
    bookGeneric.setFieldValue("mystruct", myS1);
    WeightedSet<FloatFieldValue> wsFloat = new WeightedSet<>(DataType.getWeightedSet(DataType.FLOAT));
    wsFloat.put(new FloatFieldValue(56f), 55);
    wsFloat.put(new FloatFieldValue(57f), 54);
    bookGeneric.setFieldValue("mywsfloat", wsFloat);
    Array<IntegerFieldValue> intArr1 = new Array<>(DataType.getArray(DataType.INT));
    intArr1.add(new IntegerFieldValue(1));
    intArr1.add(new IntegerFieldValue(2));
    intArr1.add(new IntegerFieldValue(3));
    Array<Array<IntegerFieldValue>> intArr1Arr = new Array<>(DataType.getArray(intArr1.getDataType()));
    intArr1Arr.add(intArr1);
    Array<Array<Array<IntegerFieldValue>>> intArr1ArrArr = new Array<>(DataType.getArray(intArr1Arr.getDataType()));
    intArr1ArrArr.add(intArr1Arr);
    bookGeneric.setFieldValue("mytriplearray", intArr1ArrArr);
    return bookGeneric;
}
Also used : ProxyDocument(com.yahoo.docproc.proxy.ProxyDocument) AnnotationType(com.yahoo.document.annotation.AnnotationType) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree)

Example 18 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class DocumentGenPluginTest method testPackComplex.

@Test
public void testPackComplex() {
    final DocumentTypeManager mgr = typeManagerForBookType();
    DocumentType bookT = mgr.getDocumentType("book");
    Document bookGeneric = new Document(bookT, new DocumentId("doc:book:0"));
    bookGeneric.setFieldValue("author", new StringFieldValue("Melville"));
    StringFieldValue title = new StringFieldValue("Moby Dick");
    SpanTree titleTree = new SpanTree();
    title.setSpanTree(titleTree);
    AnnotationType dateType = mgr.getAnnotationTypeRegistry().getType("date");
    Struct dateStruct = new Struct(mgr.getAnnotationTypeRegistry().getType("date").getDataType());
    dateStruct.setFieldValue("exacttime", new LongFieldValue(99l));
    Annotation date = new Annotation(dateType);
    date.setFieldValue(dateStruct);
    titleTree.annotate(date);
    bookGeneric.setFieldValue("title", title);
    StringFieldValue titleCheck = (StringFieldValue) bookGeneric.getFieldValue("title");
    assertEquals(titleCheck.getWrappedValue(), "Moby Dick");
    SpanTree treeCheck = titleCheck.getSpanTrees().iterator().next();
    Annotation titleAnnCheck = treeCheck.iterator().next();
    assertEquals(((StructuredFieldValue) titleAnnCheck.getFieldValue()).getFieldValue("exacttime").getWrappedValue(), 99l);
    bookGeneric.setFieldValue("year", new IntegerFieldValue(1851));
    Struct myS0 = new Struct(mgr.getDataType("ss0"));
    myS0.setFieldValue("s0", new StringFieldValue("My s0"));
    myS0.setFieldValue("d0", new DoubleFieldValue(99));
    Struct myS1 = new Struct(mgr.getDataType("ss1"));
    myS1.setFieldValue("s1", new StringFieldValue("My s1"));
    myS1.setFieldValue("l1", new LongFieldValue(89));
    Array<StringFieldValue> myAs1 = new Array<>(DataType.getArray(DataType.STRING));
    myAs1.add(new StringFieldValue("as1_1"));
    myAs1.add(new StringFieldValue("as1_2"));
    myS1.setFieldValue("as1", myAs1);
    myS1.setFieldValue("ss01", myS0);
    bookGeneric.setFieldValue("mystruct", myS1);
    assertEquals(((StructuredFieldValue) bookGeneric.getFieldValue("mystruct")).getFieldValue("s1").getWrappedValue(), "My s1");
    WeightedSet<FloatFieldValue> wsFloat = new WeightedSet<>(DataType.getWeightedSet(DataType.FLOAT));
    wsFloat.put(new FloatFieldValue(56f), 55);
    wsFloat.put(new FloatFieldValue(57f), 54);
    bookGeneric.setFieldValue("mywsfloat", wsFloat);
    Array<IntegerFieldValue> intArr1 = new Array<>(DataType.getArray(DataType.INT));
    intArr1.add(new IntegerFieldValue(1));
    intArr1.add(new IntegerFieldValue(2));
    intArr1.add(new IntegerFieldValue(3));
    Array<Array<IntegerFieldValue>> intArr1Arr = new Array<>(DataType.getArray(intArr1.getDataType()));
    intArr1Arr.add(intArr1);
    Array<Array<Array<IntegerFieldValue>>> intArr1ArrArr = new Array<>(DataType.getArray(intArr1Arr.getDataType()));
    intArr1ArrArr.add(intArr1Arr);
    bookGeneric.setFieldValue("mytriplearray", intArr1ArrArr);
    Book book = new Book(bookGeneric, bookGeneric.getId());
    assertEquals(book.getAuthor(), "Melville");
    assertEquals(book.getMystruct().getS1(), "My s1");
    assertEquals(book.getMystruct().getSs01().getS0(), "My s0");
    assertEquals(book.getMytriplearray().get(0).get(0).get(0), (Integer) 1);
    assertEquals(book.getMytriplearray().get(0).get(0).get(1), (Integer) 2);
    assertEquals(book.getMytriplearray().get(0).get(0).get(2), (Integer) 3);
    assertEquals(book.getMywsfloat().get(57f), (Integer) 54);
    assertEquals(book.getMystruct().getAs1().get(1), "as1_2");
    treeCheck = book.titleSpanTrees().values().iterator().next();
    titleAnnCheck = treeCheck.iterator().next();
    assertEquals(((StructuredFieldValue) titleAnnCheck.getFieldValue()).getFieldValue("exacttime").getWrappedValue(), 99l);
    Book book2 = new Book(book, book.getId());
    assertEquals(book2.getId(), bookGeneric.getId());
    assertEquals(book2.getAuthor(), "Melville");
    assertEquals(book2.getMystruct().getS1(), "My s1");
    assertEquals(book2.getMystruct().getSs01().getS0(), "My s0");
    assertEquals(book2.getMytriplearray().get(0).get(0).get(0), (Integer) 1);
    assertEquals(book2.getMytriplearray().get(0).get(0).get(1), (Integer) 2);
    assertEquals(book2.getMytriplearray().get(0).get(0).get(2), (Integer) 3);
    assertEquals(book2.getMywsfloat().get(57f), (Integer) 54);
    assertEquals(book2.getMystruct().getAs1().get(1), "as1_2");
    treeCheck = book2.titleSpanTrees().values().iterator().next();
    titleAnnCheck = treeCheck.iterator().next();
    assertEquals(((StructuredFieldValue) titleAnnCheck.getFieldValue()).getFieldValue("exacttime").getWrappedValue(), 99l);
}
Also used : ProxyDocument(com.yahoo.docproc.proxy.ProxyDocument) AnnotationType(com.yahoo.document.annotation.AnnotationType) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Example 19 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatTermAnnotationsAreLowerCased.

@Test
public void requireThatTermAnnotationsAreLowerCased() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
    for (boolean specialToken : Arrays.asList(true, false)) {
        for (TokenType type : TokenType.values()) {
            if (!specialToken && !type.isIndexable()) {
                continue;
            }
            assertAnnotations(expected, "foo", newToken("foo", "BAR", type, specialToken));
        }
    }
}
Also used : TokenType(com.yahoo.language.process.TokenType) StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Example 20 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class LinguisticsAnnotatorTestCase method requireThatIndexableTokenStringsAreAnnotated.

@Test
public void requireThatIndexableTokenStringsAreAnnotated() {
    SpanTree expected = new SpanTree(SpanTrees.LINGUISTICS);
    expected.spanList().span(0, 3).annotate(new Annotation(AnnotationTypes.TERM, new StringFieldValue("bar")));
    for (TokenType type : TokenType.values()) {
        if (!type.isIndexable()) {
            continue;
        }
        assertAnnotations(expected, "foo", newToken("foo", "bar", type));
    }
}
Also used : TokenType(com.yahoo.language.process.TokenType) StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Aggregations

SpanTree (com.yahoo.document.annotation.SpanTree)25 Annotation (com.yahoo.document.annotation.Annotation)17 StringFieldValue (com.yahoo.document.datatypes.StringFieldValue)15 Test (org.junit.Test)14 AnnotationType (com.yahoo.document.annotation.AnnotationType)6 TokenType (com.yahoo.language.process.TokenType)6 Document (com.yahoo.document.Document)3 SimpleToken (com.yahoo.language.simple.SimpleToken)3 TestDocumentProcessor1 (com.yahoo.docproc.DocumentProcessingAbstractTestCase.TestDocumentProcessor1)2 ProxyDocument (com.yahoo.docproc.proxy.ProxyDocument)2 DocumentType (com.yahoo.document.DocumentType)2 Span (com.yahoo.document.annotation.Span)2 SpanList (com.yahoo.document.annotation.SpanList)2 Linguistics (com.yahoo.language.Linguistics)2 Token (com.yahoo.language.process.Token)2 SimpleLinguistics (com.yahoo.language.simple.SimpleLinguistics)2 Ss1 (com.yahoo.vespa.documentgen.test.Book.Ss1)2 Date (com.yahoo.vespa.documentgen.test.annotation.Date)2 HashMap (java.util.HashMap)2 DocumentId (com.yahoo.document.DocumentId)1