use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.
the class SchemaMappingAndAccessesTest method testMappedDoc.
public void testMappedDoc() {
Document doc = getDoc();
Map<String, String> fieldMap = new HashMap<>();
fieldMap.put("t", "title");
fieldMap.put("a", "artist");
ProxyDocument mapped = new ProxyDocument(new TestDocumentProcessor1(), doc, fieldMap);
// Document mapped=doc;
// mapped.setFieldMap(fieldMap);
assertEquals(new StringFieldValue("Black Rock"), mapped.getFieldValue("t"));
// assertEquals(new StringFieldValue("Black Rock"), proxy.getFieldValue(new com.yahoo.document.Field("t")));
assertEquals(new StringFieldValue("Joe Bonamassa").getWrappedValue(), mapped.getFieldValue("a").getWrappedValue());
mapped.setFieldValue("t", new StringFieldValue("The Ballad Of John Henry"));
StringFieldValue bona = new StringFieldValue("Bonamassa");
mapped.setFieldValue("a", bona);
// mapped.setFieldValue("a", new StringFieldValue("Bonamassa"));
assertEquals(new StringFieldValue("The Ballad Of John Henry"), doc.getFieldValue("title"));
assertEquals(new StringFieldValue("The Ballad Of John Henry"), mapped.getFieldValue("t"));
assertEquals(new StringFieldValue("Bonamassa"), doc.getFieldValue("artist"));
assertEquals(new StringFieldValue("Bonamassa"), mapped.getFieldValue("a"));
mapped.setFieldValue("a", mapped.getFieldValue("a") + "Hughes");
assertEquals(new StringFieldValue("BonamassaHughes"), mapped.getFieldValue("a"));
// Verify consistency when using string values to manipluate annotation span trees
StringFieldValue unmapped1 = (StringFieldValue) doc.getFieldValue("artist");
StringFieldValue unmapped2 = (StringFieldValue) doc.getFieldValue("artist");
assertTrue(unmapped1 == unmapped2);
unmapped1.setSpanTree(new SpanTree("test"));
assertEquals(unmapped2.getSpanTree("test").getName(), "test");
StringFieldValue mapped1 = (StringFieldValue) mapped.getFieldValue("a");
mapped1.setSpanTree(new SpanTree("test2"));
StringFieldValue mapped2 = (StringFieldValue) mapped.getFieldValue("a");
assertTrue(mapped1 == mapped2);
assertEquals(mapped2.getSpanTree("test2").getName(), "test2");
mapped.removeFieldValue("a");
assertEquals(mapped.getFieldValue("a"), null);
mapped.removeFieldValue(mapped.getField("t"));
assertEquals(mapped.getFieldValue("t"), null);
mapped.setFieldValue("a", new StringFieldValue("Bonamassa"));
assertEquals(new StringFieldValue("Bonamassa"), doc.getFieldValue("artist"));
mapped.removeFieldValue("a");
assertEquals(mapped.getFieldValue("a"), null);
}
use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.
the class LinguisticsAnnotator method annotate.
/**
* Annotates the given string with the appropriate linguistics annotations.
*
* @param text the text to annotate
* @return whether or not anything was annotated
*/
public boolean annotate(StringFieldValue text) {
// Already annotated with LINGUISTICS.
if (text.getSpanTree(SpanTrees.LINGUISTICS) != null)
return true;
Tokenizer tokenizer = factory.getTokenizer();
String input = (text.getString().length() <= config.getMaxTokenizeLength()) ? text.getString() : text.getString().substring(0, config.getMaxTokenizeLength());
Iterable<Token> tokens = tokenizer.tokenize(input, config.getLanguage(), config.getStemMode(), config.getRemoveAccents());
TermOccurrences termOccurrences = new TermOccurrences(config.getMaxTermOccurrences());
SpanTree tree = new SpanTree(SpanTrees.LINGUISTICS);
for (Token token : tokens) {
addAnnotationSpan(text.getString(), tree.spanList(), tokenizer, token, config.getStemMode(), termOccurrences);
}
if (tree.numAnnotations() == 0)
return false;
text.setSpanTree(tree);
return true;
}
use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.
the class StringTestCase method annotate.
public Document annotate(Document document, DocumentTypeManager manager) {
AnnotationTypeRegistry registry = manager.getAnnotationTypeRegistry();
AnnotationType company = registry.getType("company");
AnnotationType industry = registry.getType("industry");
AnnotationType person = registry.getType("person");
AnnotationType location = registry.getType("location");
Map<String, AnnotationType> m = registry.getTypes();
for (String key : m.keySet()) {
System.out.println("Key: " + key);
AnnotationType val = m.get(key);
parseAnnotationType(val);
}
SpanTree tree = new SpanTree("testannotations");
SpanList root = (SpanList) tree.getRoot();
SpanNode companySpan = new Span(0, 5);
SpanNode industrySpan = new Span(5, 10);
SpanNode personSpan = new Span(10, 15);
SpanNode locationSpan = new Span(15, 20);
root.add(companySpan);
root.add(industrySpan);
root.add(personSpan);
root.add(locationSpan);
Struct companyValue = (Struct) company.getDataType().createFieldValue();
companyValue.setFieldValue("name", new StringFieldValue("Sun"));
companyValue.setFieldValue("ceo", new StringFieldValue("Scott Mcnealy"));
companyValue.setFieldValue("lat", new DoubleFieldValue(37.7));
companyValue.setFieldValue("lon", new DoubleFieldValue(-122.44));
companyValue.setFieldValue("vertical", new StringFieldValue("software"));
Annotation compAn = new Annotation(company, companyValue);
tree.annotate(companySpan, compAn);
Struct personValue = new Struct(manager.getDataType("annotation.person"));
personValue.setFieldValue("name", new StringFieldValue("Richard Bair"));
Annotation personAn = new Annotation(person, personValue);
tree.annotate(personSpan, personAn);
Struct locValue = new Struct(manager.getDataType("annotation.location"));
locValue.setFieldValue("name", new StringFieldValue("Prinsens Gate"));
Annotation loc = new Annotation(location, locValue);
tree.annotate(locationSpan, loc);
Struct locValue2 = new Struct(manager.getDataType("annotation.location"));
locValue2.setFieldValue("name", new StringFieldValue("Kongens Gate"));
Annotation locAn = new Annotation(location, locValue2);
tree.annotate(locationSpan, locAn);
SpanList branch = new SpanList();
SpanNode span1 = new Span(0, 3);
SpanNode span2 = new Span(1, 9);
SpanNode span3 = new Span(12, 10);
branch.add(span1);
branch.add(span3);
branch.add(span2);
Struct industryValue = new Struct(manager.getDataType("annotation.industry"));
industryValue.setFieldValue("vertical", new StringFieldValue("Manufacturing"));
Annotation ind = new Annotation(industry, industryValue);
tree.annotate(span1, ind);
Struct pValue = new Struct(manager.getDataType("annotation.person"));
pValue.setFieldValue("name", new StringFieldValue("Praveen Mohan"));
Annotation pAn = new Annotation(person, pValue);
tree.annotate(span2, pAn);
Struct lValue = new Struct(manager.getDataType("annotation.location"));
lValue.setFieldValue("name", new StringFieldValue("Embassy Golf Links"));
Annotation locn = new Annotation(location, lValue);
tree.annotate(span3, locn);
Struct cValue = (Struct) company.getDataType().createFieldValue();
cValue.setFieldValue("name", new StringFieldValue("Yahoo"));
cValue.setFieldValue("ceo", new StringFieldValue("Carol Bartz"));
cValue.setFieldValue("lat", new DoubleFieldValue(127.7));
cValue.setFieldValue("lon", new DoubleFieldValue(-42.44));
cValue.setFieldValue("vertical", new StringFieldValue("search"));
Annotation cAn = new Annotation(company, cValue);
tree.annotate(branch, cAn);
Struct pVal = new Struct(manager.getDataType("annotation.person"));
pVal.setFieldValue("name", new StringFieldValue("Kim Omar"));
Annotation an = new Annotation(person, pVal);
tree.annotate(root, an);
root.add(branch);
StringFieldValue body = (StringFieldValue) document.getFieldValue(document.getDataType().getField("body"));
root.remove(branch);
tree.cleanup();
System.out.println("No. Of Annotations: " + tree.numAnnotations());
body.setSpanTree(tree);
document.setFieldValue(document.getField("body"), body);
return document;
}
use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.
the class StringTestCase method testNestedSpanTreeBug4187377.
@Test
public void testNestedSpanTreeBug4187377() {
AnnotationType type = new AnnotationType("ann", DataType.STRING);
StringFieldValue outerString = new StringFieldValue("Ballooo");
SpanTree outerTree = new SpanTree("outer");
outerString.setSpanTree(outerTree);
SpanList outerRoot = (SpanList) outerTree.getRoot();
Span outerSpan = new Span(0, 1);
outerRoot.add(outerSpan);
StringFieldValue innerString = new StringFieldValue("innerBalloooo");
outerTree.annotate(outerSpan, new Annotation(type, innerString));
SpanTree innerTree = new SpanTree("inner");
innerString.setSpanTree(innerTree);
SpanList innerRoot = (SpanList) innerTree.getRoot();
Span innerSpan = new Span(0, 1);
innerRoot.add(innerSpan);
innerTree.annotate(innerSpan, new Annotation(type));
GrowableByteBuffer buffer = new GrowableByteBuffer(1024);
DocumentSerializer serializer = DocumentSerializerFactory.create42(buffer);
try {
serializer.write(null, outerString);
fail("Should have failed, nested span trees are not supported.");
} catch (SerializationException se) {
// OK!
}
}
use of com.yahoo.document.annotation.SpanTree in project vespa by vespa-engine.
the class DocumentGenPluginTest method testBaseAnnotations.
@Test
public void testBaseAnnotations() {
Book book = getBook();
SpanTree authorTree = new SpanTree();
Person p = new Person();
p.setName("Melville");
authorTree.annotate(p);
StringFieldValue sfv = ((StringFieldValue) book.getFieldValue("author"));
sfv.setSpanTree(authorTree);
book.setFieldValue("author", sfv);
assertEquals(book.authorSpanTrees().values().iterator().next().iterator().next(), p);
final SpanTree descTree = new SpanTree();
Person p2 = new Person();
p2.setName("H. Melville");
descTree.annotate(p2);
book.setDescriptionSpanTrees(new HashMap<String, SpanTree>() {
{
put(descTree.getName(), descTree);
}
});
assertEquals(((Person) ((StringFieldValue) book.getFieldValue(book.getField("description"))).getSpanTrees().iterator().next().iterator().next()).getName(), "H. Melville");
assertEquals(((Person) ((StringFieldValue) book.removeFieldValue("description")).getSpanTrees().iterator().next().iterator().next()).getName(), "H. Melville");
assertEquals(book.descriptionSpanTrees(), null);
assertEquals((book.getFieldValue("description")), null);
Artist a = new Artist();
assertTrue(Person.class.isInstance(a));
assertEquals(((StructDataType) a.getType().getDataType()).getField("name").getDataType(), DataType.STRING);
assertEquals(((StructDataType) a.getType().getDataType()).getField("instrument").getDataType(), DataType.INT);
assertEquals(((Struct) a.getFieldValue()).getField("name").getDataType(), DataType.STRING);
}
Aggregations