Search in sources :

Example 6 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class SchemaMappingAndAccessesTest method testMappedDoc.

public void testMappedDoc() {
    Document doc = getDoc();
    Map<String, String> fieldMap = new HashMap<>();
    fieldMap.put("t", "title");
    fieldMap.put("a", "artist");
    ProxyDocument mapped = new ProxyDocument(new TestDocumentProcessor1(), doc, fieldMap);
    // Document mapped=doc;
    // mapped.setFieldMap(fieldMap);
    assertEquals(new StringFieldValue("Black Rock"), mapped.getFieldValue("t"));
    // assertEquals(new StringFieldValue("Black Rock"), proxy.getFieldValue(new com.yahoo.document.Field("t")));
    assertEquals(new StringFieldValue("Joe Bonamassa").getWrappedValue(), mapped.getFieldValue("a").getWrappedValue());
    mapped.setFieldValue("t", new StringFieldValue("The Ballad Of John Henry"));
    StringFieldValue bona = new StringFieldValue("Bonamassa");
    mapped.setFieldValue("a", bona);
    // mapped.setFieldValue("a", new StringFieldValue("Bonamassa"));
    assertEquals(new StringFieldValue("The Ballad Of John Henry"), doc.getFieldValue("title"));
    assertEquals(new StringFieldValue("The Ballad Of John Henry"), mapped.getFieldValue("t"));
    assertEquals(new StringFieldValue("Bonamassa"), doc.getFieldValue("artist"));
    assertEquals(new StringFieldValue("Bonamassa"), mapped.getFieldValue("a"));
    mapped.setFieldValue("a", mapped.getFieldValue("a") + "Hughes");
    assertEquals(new StringFieldValue("BonamassaHughes"), mapped.getFieldValue("a"));
    // Verify consistency when using string values to manipluate annotation span trees
    StringFieldValue unmapped1 = (StringFieldValue) doc.getFieldValue("artist");
    StringFieldValue unmapped2 = (StringFieldValue) doc.getFieldValue("artist");
    assertTrue(unmapped1 == unmapped2);
    unmapped1.setSpanTree(new SpanTree("test"));
    assertEquals(unmapped2.getSpanTree("test").getName(), "test");
    StringFieldValue mapped1 = (StringFieldValue) mapped.getFieldValue("a");
    mapped1.setSpanTree(new SpanTree("test2"));
    StringFieldValue mapped2 = (StringFieldValue) mapped.getFieldValue("a");
    assertTrue(mapped1 == mapped2);
    assertEquals(mapped2.getSpanTree("test2").getName(), "test2");
    mapped.removeFieldValue("a");
    assertEquals(mapped.getFieldValue("a"), null);
    mapped.removeFieldValue(mapped.getField("t"));
    assertEquals(mapped.getFieldValue("t"), null);
    mapped.setFieldValue("a", new StringFieldValue("Bonamassa"));
    assertEquals(new StringFieldValue("Bonamassa"), doc.getFieldValue("artist"));
    mapped.removeFieldValue("a");
    assertEquals(mapped.getFieldValue("a"), null);
}
Also used : HashMap(java.util.HashMap) StringFieldValue(com.yahoo.document.datatypes.StringFieldValue) TestDocumentProcessor1(com.yahoo.docproc.DocumentProcessingAbstractTestCase.TestDocumentProcessor1) Document(com.yahoo.document.Document) SpanTree(com.yahoo.document.annotation.SpanTree)

Example 7 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class LinguisticsAnnotator method annotate.

/**
 * Annotates the given string with the appropriate linguistics annotations.
 *
 * @param text the text to annotate
 * @return whether or not anything was annotated
 */
public boolean annotate(StringFieldValue text) {
    // Already annotated with LINGUISTICS.
    if (text.getSpanTree(SpanTrees.LINGUISTICS) != null)
        return true;
    Tokenizer tokenizer = factory.getTokenizer();
    String input = (text.getString().length() <= config.getMaxTokenizeLength()) ? text.getString() : text.getString().substring(0, config.getMaxTokenizeLength());
    Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(), config.getRemoveAccents());
    TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences());
    SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS);
    for (Token token : tokens) {
        addAnnotationSpan(text.getString(), tree.spanList(), tokenizer, token, config.getStemMode(), termOccurrences);
    }
    if (tree.numAnnotations() == 0)
        return false;
    text.setSpanTree(tree);
    return true;
}
Also used : Token(com.yahoo.language.process.Token) Tokenizer(com.yahoo.language.process.Tokenizer) SpanTree(com.yahoo.document.annotation.SpanTree)

Example 8 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class StringTestCase method annotate.

public Document annotate(Document document, DocumentTypeManager manager) {
    AnnotationTypeRegistry registry = manager.getAnnotationTypeRegistry();
    AnnotationType company = registry.getType("company");
    AnnotationType industry = registry.getType("industry");
    AnnotationType person = registry.getType("person");
    AnnotationType location = registry.getType("location");
    Map<String, AnnotationType> m = registry.getTypes();
    for (String key : m.keySet()) {
        System.out.println("Key: " + key);
        AnnotationType val = m.get(key);
        parseAnnotationType(val);
    }
    SpanTree tree = new SpanTree("testannotations");
    SpanList root = (SpanList) tree.getRoot();
    SpanNode companySpan = new Span(0, 5);
    SpanNode industrySpan = new Span(5, 10);
    SpanNode personSpan = new Span(10, 15);
    SpanNode locationSpan = new Span(15, 20);
    root.add(companySpan);
    root.add(industrySpan);
    root.add(personSpan);
    root.add(locationSpan);
    Struct companyValue = (Struct) company.getDataType().createFieldValue();
    companyValue.setFieldValue("name", new StringFieldValue("Sun"));
    companyValue.setFieldValue("ceo", new StringFieldValue("Scott Mcnealy"));
    companyValue.setFieldValue("lat", new DoubleFieldValue(37.7));
    companyValue.setFieldValue("lon", new DoubleFieldValue(-122.44));
    companyValue.setFieldValue("vertical", new StringFieldValue("software"));
    Annotation compAn = new Annotation(company, companyValue);
    tree.annotate(companySpan, compAn);
    Struct personValue = new Struct(manager.getDataType("annotation.person"));
    personValue.setFieldValue("name", new StringFieldValue("Richard Bair"));
    Annotation personAn = new Annotation(person, personValue);
    tree.annotate(personSpan, personAn);
    Struct locValue = new Struct(manager.getDataType("annotation.location"));
    locValue.setFieldValue("name", new StringFieldValue("Prinsens Gate"));
    Annotation loc = new Annotation(location, locValue);
    tree.annotate(locationSpan, loc);
    Struct locValue2 = new Struct(manager.getDataType("annotation.location"));
    locValue2.setFieldValue("name", new StringFieldValue("Kongens Gate"));
    Annotation locAn = new Annotation(location, locValue2);
    tree.annotate(locationSpan, locAn);
    SpanList branch = new SpanList();
    SpanNode span1 = new Span(0, 3);
    SpanNode span2 = new Span(1, 9);
    SpanNode span3 = new Span(12, 10);
    branch.add(span1);
    branch.add(span3);
    branch.add(span2);
    Struct industryValue = new Struct(manager.getDataType("annotation.industry"));
    industryValue.setFieldValue("vertical", new StringFieldValue("Manufacturing"));
    Annotation ind = new Annotation(industry, industryValue);
    tree.annotate(span1, ind);
    Struct pValue = new Struct(manager.getDataType("annotation.person"));
    pValue.setFieldValue("name", new StringFieldValue("Praveen Mohan"));
    Annotation pAn = new Annotation(person, pValue);
    tree.annotate(span2, pAn);
    Struct lValue = new Struct(manager.getDataType("annotation.location"));
    lValue.setFieldValue("name", new StringFieldValue("Embassy Golf Links"));
    Annotation locn = new Annotation(location, lValue);
    tree.annotate(span3, locn);
    Struct cValue = (Struct) company.getDataType().createFieldValue();
    cValue.setFieldValue("name", new StringFieldValue("Yahoo"));
    cValue.setFieldValue("ceo", new StringFieldValue("Carol Bartz"));
    cValue.setFieldValue("lat", new DoubleFieldValue(127.7));
    cValue.setFieldValue("lon", new DoubleFieldValue(-42.44));
    cValue.setFieldValue("vertical", new StringFieldValue("search"));
    Annotation cAn = new Annotation(company, cValue);
    tree.annotate(branch, cAn);
    Struct pVal = new Struct(manager.getDataType("annotation.person"));
    pVal.setFieldValue("name", new StringFieldValue("Kim Omar"));
    Annotation an = new Annotation(person, pVal);
    tree.annotate(root, an);
    root.add(branch);
    StringFieldValue body = (StringFieldValue) document.getFieldValue(document.getDataType().getField("body"));
    root.remove(branch);
    tree.cleanup();
    System.out.println("No. Of Annotations: " + tree.numAnnotations());
    body.setSpanTree(tree);
    document.setFieldValue(document.getField("body"), body);
    return document;
}
Also used : SpanNode(com.yahoo.document.annotation.SpanNode) SpanList(com.yahoo.document.annotation.SpanList) Span(com.yahoo.document.annotation.Span) AnnotationTypeRegistry(com.yahoo.document.annotation.AnnotationTypeRegistry) AnnotationType(com.yahoo.document.annotation.AnnotationType) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree)

Example 9 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class StringTestCase method testNestedSpanTreeBug4187377.

@Test
public void testNestedSpanTreeBug4187377() {
    AnnotationType type = new AnnotationType("ann", DataType.STRING);
    StringFieldValue outerString = new StringFieldValue("Ballooo");
    SpanTree outerTree = new SpanTree("outer");
    outerString.setSpanTree(outerTree);
    SpanList outerRoot = (SpanList) outerTree.getRoot();
    Span outerSpan = new Span(0, 1);
    outerRoot.add(outerSpan);
    StringFieldValue innerString = new StringFieldValue("innerBalloooo");
    outerTree.annotate(outerSpan, new Annotation(type, innerString));
    SpanTree innerTree = new SpanTree("inner");
    innerString.setSpanTree(innerTree);
    SpanList innerRoot = (SpanList) innerTree.getRoot();
    Span innerSpan = new Span(0, 1);
    innerRoot.add(innerSpan);
    innerTree.annotate(innerSpan, new Annotation(type));
    GrowableByteBuffer buffer = new GrowableByteBuffer(1024);
    DocumentSerializer serializer = DocumentSerializerFactory.create42(buffer);
    try {
        serializer.write(null, outerString);
        fail("Should have failed, nested span trees are not supported.");
    } catch (SerializationException se) {
    // OK!
    }
}
Also used : GrowableByteBuffer(com.yahoo.io.GrowableByteBuffer) SpanList(com.yahoo.document.annotation.SpanList) Span(com.yahoo.document.annotation.Span) AnnotationType(com.yahoo.document.annotation.AnnotationType) Annotation(com.yahoo.document.annotation.Annotation) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test) AbstractTypesTest(com.yahoo.document.annotation.AbstractTypesTest)

Example 10 with SpanTree

use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.

the class DocumentGenPluginTest method testBaseAnnotations.

@Test
public void testBaseAnnotations() {
    Book book = getBook();
    SpanTree authorTree = new SpanTree();
    Person p = new Person();
    p.setName("Melville");
    authorTree.annotate(p);
    StringFieldValue sfv = ((StringFieldValue) book.getFieldValue("author"));
    sfv.setSpanTree(authorTree);
    book.setFieldValue("author", sfv);
    assertEquals(book.authorSpanTrees().values().iterator().next().iterator().next(), p);
    final SpanTree descTree = new SpanTree();
    Person p2 = new Person();
    p2.setName("H. Melville");
    descTree.annotate(p2);
    book.setDescriptionSpanTrees(new HashMap<String, SpanTree>() {

        {
            put(descTree.getName(), descTree);
        }
    });
    assertEquals(((Person) ((StringFieldValue) book.getFieldValue(book.getField("description"))).getSpanTrees().iterator().next().iterator().next()).getName(), "H. Melville");
    assertEquals(((Person) ((StringFieldValue) book.removeFieldValue("description")).getSpanTrees().iterator().next().iterator().next()).getName(), "H. Melville");
    assertEquals(book.descriptionSpanTrees(), null);
    assertEquals((book.getFieldValue("description")), null);
    Artist a = new Artist();
    assertTrue(Person.class.isInstance(a));
    assertEquals(((StructDataType) a.getType().getDataType()).getField("name").getDataType(), DataType.STRING);
    assertEquals(((StructDataType) a.getType().getDataType()).getField("instrument").getDataType(), DataType.INT);
    assertEquals(((Struct) a.getFieldValue()).getField("name").getDataType(), DataType.STRING);
}
Also used : Artist(com.yahoo.vespa.documentgen.test.annotation.Artist) Person(com.yahoo.vespa.documentgen.test.annotation.Person) SpanTree(com.yahoo.document.annotation.SpanTree) Test(org.junit.Test)

Aggregations

SpanTree (com.yahoo.document.annotation.SpanTree)25 Annotation (com.yahoo.document.annotation.Annotation)17 StringFieldValue (com.yahoo.document.datatypes.StringFieldValue)15 Test (org.junit.Test)14 AnnotationType (com.yahoo.document.annotation.AnnotationType)6 TokenType (com.yahoo.language.process.TokenType)6 Document (com.yahoo.document.Document)3 SimpleToken (com.yahoo.language.simple.SimpleToken)3 TestDocumentProcessor1 (com.yahoo.docproc.DocumentProcessingAbstractTestCase.TestDocumentProcessor1)2 ProxyDocument (com.yahoo.docproc.proxy.ProxyDocument)2 DocumentType (com.yahoo.document.DocumentType)2 Span (com.yahoo.document.annotation.Span)2 SpanList (com.yahoo.document.annotation.SpanList)2 Linguistics (com.yahoo.language.Linguistics)2 Token (com.yahoo.language.process.Token)2 SimpleLinguistics (com.yahoo.language.simple.SimpleLinguistics)2 Ss1 (com.yahoo.vespa.documentgen.test.Book.Ss1)2 Date (com.yahoo.vespa.documentgen.test.annotation.Date)2 HashMap (java.util.HashMap)2 DocumentId (com.yahoo.document.DocumentId)1