Search in sources :

Example 1 with TextAnnotation

use of org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation in project stanbol by apache.

the class TestLocationEnhancementEngine method getTextAnnotation.

public static void getTextAnnotation(ContentItem ci, String name, String context, IRI type) {
    String content;
    try {
        content = IOUtils.toString(ci.getStream(), "UTF-8");
    } catch (IOException e) {
        //should never happen anyway!
        content = "";
    }
    RdfEntityFactory factory = RdfEntityFactory.createInstance(ci.getMetadata());
    TextAnnotation testAnnotation = factory.getProxy(new IRI("urn:org.apache:stanbol.enhancer:test:text-annotation:person"), TextAnnotation.class);
    testAnnotation.setCreator(new IRI("urn:org.apache:stanbol.enhancer:test:dummyEngine"));
    testAnnotation.setCreated(new Date());
    testAnnotation.setSelectedText(name);
    testAnnotation.setSelectionContext(context);
    testAnnotation.getDcType().add(type);
    Integer start = content.indexOf(name);
    if (start < 0) {
        //if not found in the content
        //set some random numbers for start/end
        start = (int) (Math.random() * 100);
    }
    testAnnotation.setStart(start);
    testAnnotation.setEnd(start + name.length());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RdfEntityFactory(org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory) IOException(java.io.IOException) TextAnnotation(org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation) Date(java.util.Date)

Example 2 with TextAnnotation

use of org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation in project stanbol by apache.

the class TestEnhancementInterfaces method testEnhancementInterfaces.

@Test
public void testEnhancementInterfaces() throws Exception {
    ContentItem ci = createContentItem(SINGLE_SENTENCE_SOURCE);
    IRI ciUri = new IRI(ci.getUri().getUnicodeString());
    RdfEntityFactory factory = RdfEntityFactory.createInstance(ci.getMetadata());
    long start = System.currentTimeMillis();
    //create an Text Annotation representing an extracted Person
    TextAnnotation personAnnotation = factory.getProxy(createEnhancementURI(), TextAnnotation.class);
    personAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
    personAnnotation.setCreated(new Date());
    personAnnotation.setExtractedFrom(ciUri);
    personAnnotation.getDcType().add(new IRI("http://www.example.org/cv/annotatation-types/text#Person"));
    personAnnotation.setConfidence(0.8);
    personAnnotation.setSelectedText("Patrick Marshall");
    personAnnotation.setStart(SINGLE_SENTENCE.indexOf(personAnnotation.getSelectedText()));
    personAnnotation.setEnd(personAnnotation.getStart() + personAnnotation.getSelectedText().length());
    personAnnotation.setSelectionContext(SINGLE_SENTENCE);
    //create an Text Annotation representing an extracted Location
    TextAnnotation locationAnnotation = factory.getProxy(createEnhancementURI(), TextAnnotation.class);
    locationAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
    locationAnnotation.setCreated(new Date());
    locationAnnotation.setExtractedFrom(ciUri);
    locationAnnotation.getDcType().add(new IRI("http://www.example.org/cv/annotatation-types/text#Location"));
    locationAnnotation.setConfidence(0.78);
    locationAnnotation.setSelectedText("New Zealand");
    locationAnnotation.setStart(SINGLE_SENTENCE.indexOf(locationAnnotation.getSelectedText()));
    locationAnnotation.setEnd(locationAnnotation.getStart() + locationAnnotation.getSelectedText().length());
    locationAnnotation.setSelectionContext(SINGLE_SENTENCE);
    //create an Text Annotation representing an extracted Organisation
    TextAnnotation orgAnnotation = factory.getProxy(createEnhancementURI(), TextAnnotation.class);
    orgAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
    orgAnnotation.setCreated(new Date());
    orgAnnotation.setExtractedFrom(ciUri);
    orgAnnotation.getDcType().add(new IRI("http://www.example.org/cv/annotatation-types/text#Organisation"));
    orgAnnotation.setConfidence(0.78);
    orgAnnotation.setSelectedText("University of Otago");
    orgAnnotation.setStart(SINGLE_SENTENCE.indexOf(orgAnnotation.getSelectedText()));
    orgAnnotation.setEnd(orgAnnotation.getStart() + orgAnnotation.getSelectedText().length());
    orgAnnotation.setSelectionContext(SINGLE_SENTENCE);
    // create an Entity Annotation for the person TextAnnotation
    EntityAnnotation patrickMarshall = factory.getProxy(createEnhancementURI(), EntityAnnotation.class);
    patrickMarshall.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
    patrickMarshall.setCreated(new Date());
    patrickMarshall.setExtractedFrom(ciUri);
    patrickMarshall.getDcType().add(new IRI("http://www.example.org/cv/annotatation-types/entity#Entity"));
    patrickMarshall.setConfidence(0.56);
    patrickMarshall.getRelations().add(personAnnotation);
    patrickMarshall.setEntityLabel("Patrick Marshall");
    patrickMarshall.setEntityReference(new IRI("http://rdf.freebase.com/rdf/en/patrick_marshall"));
    patrickMarshall.getEntityTypes().addAll(Arrays.asList(new IRI("http://rdf.freebase.com/ns/people.person"), new IRI("http://rdf.freebase.com/ns/common.topic"), new IRI("http://rdf.freebase.com/ns/education.academic")));
    // and an other for New Zealand
    EntityAnnotation newZealand = factory.getProxy(createEnhancementURI(), EntityAnnotation.class);
    newZealand.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
    newZealand.setCreated(new Date());
    newZealand.setExtractedFrom(ciUri);
    newZealand.getDcType().add(new IRI("http://www.example.org/cv/annotatation-types/entity#Entity"));
    newZealand.setConfidence(0.98);
    newZealand.getRelations().add(locationAnnotation);
    newZealand.setEntityLabel("New Zealand");
    newZealand.setEntityReference(new IRI("http://rdf.freebase.com/rdf/en/new_zealand"));
    newZealand.getEntityTypes().addAll(Arrays.asList(new IRI("http://rdf.freebase.com/ns/location.location"), new IRI("http://rdf.freebase.com/ns/common.topic"), new IRI("http://rdf.freebase.com/ns/location.country")));
    // and an other option for New Zealand
    EntityAnnotation airNewZealand = factory.getProxy(createEnhancementURI(), EntityAnnotation.class);
    airNewZealand.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
    airNewZealand.setCreated(new Date());
    airNewZealand.setExtractedFrom(ciUri);
    airNewZealand.getDcType().add(new IRI("http://www.example.org/cv/annotatation-types/entity#Entity"));
    airNewZealand.setConfidence(0.36);
    airNewZealand.getRelations().add(locationAnnotation);
    airNewZealand.setEntityLabel("New Zealand");
    airNewZealand.setEntityReference(new IRI("http://rdf.freebase.com/rdf/en/air_new_zealand"));
    airNewZealand.getEntityTypes().addAll(Arrays.asList(new IRI("http://rdf.freebase.com/ns/business.sponsor"), new IRI("http://rdf.freebase.com/ns/common.topic"), new IRI("http://rdf.freebase.com/ns/travel.transport_operator"), new IRI("http://rdf.freebase.com/ns/aviation.airline"), new IRI("http://rdf.freebase.com/ns/aviation.aircraft_owner"), new IRI("http://rdf.freebase.com/ns/business.employer"), new IRI("http://rdf.freebase.com/ns/freebase.apps.hosts.com.appspot.acre.juggle.juggle"), new IRI("http://rdf.freebase.com/ns/business.company")));
    System.out.println("creation time " + (System.currentTimeMillis() - start) + "ms");
    //now test the enhancement
    int numberOfTextAnnotations = checkAllTextAnnotations(ci.getMetadata());
    assertEquals(3, numberOfTextAnnotations);
    int numberOfEntityAnnotations = checkAllEntityAnnotations(ci.getMetadata());
    assertEquals(3, numberOfEntityAnnotations);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RdfEntityFactory(org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory) TextAnnotation(org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation) EntityAnnotation(org.apache.stanbol.enhancer.rdfentities.fise.EntityAnnotation) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) InMemoryContentItem(org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItem) Date(java.util.Date) Test(org.junit.Test)

Example 3 with TextAnnotation

use of org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation in project stanbol by apache.

the class TestEntityLinkingEnhancementEngine method getTextAnnotation.

public static void getTextAnnotation(ContentItem ci, String name, String context, IRI type) {
    String content;
    try {
        content = IOUtils.toString(ci.getStream(), "UTF-8");
    } catch (IOException e) {
        //should never happen anyway!
        content = "";
    }
    RdfEntityFactory factory = RdfEntityFactory.createInstance(ci.getMetadata());
    TextAnnotation textAnnotation = factory.getProxy(new IRI("urn:iks-project:enhancer:test:text-annotation:" + randomUUID()), TextAnnotation.class);
    textAnnotation.setCreator(new IRI("urn:iks-project:enhancer:test:dummyEngine"));
    textAnnotation.setCreated(new Date());
    textAnnotation.setSelectedText(name);
    textAnnotation.setSelectionContext(context);
    textAnnotation.getDcType().add(type);
    Integer start = content.indexOf(name);
    if (start < 0) {
        //if not found in the content
        //set some random numbers for start/end
        start = (int) (Math.random() * 100);
    }
    textAnnotation.setStart(start);
    textAnnotation.setEnd(start + name.length());
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) RdfEntityFactory(org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory) IOException(java.io.IOException) TextAnnotation(org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation) Date(java.util.Date)

Aggregations

Date (java.util.Date)3 IRI (org.apache.clerezza.commons.rdf.IRI)3 RdfEntityFactory (org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory)3 TextAnnotation (org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation)3 IOException (java.io.IOException)2 InMemoryContentItem (org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItem)1 EntityAnnotation (org.apache.stanbol.enhancer.rdfentities.fise.EntityAnnotation)1 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)1 Test (org.junit.Test)1