Search in sources :

Example 11 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class CeliNamedEntityExtractionEnhancementEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    String language = EnhancementEngineHelper.getLanguage(ci);
    if (language == null) {
        throw new IllegalStateException("Unable to extract Language for " + "ContentItem " + ci.getUri() + ": This is also checked in the canEnhance " + "method! -> This indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    // used for the palin literals in TextAnnotations
    Language lang = new Language(language);
    try {
        List<NamedEntity> lista = this.client.extractEntities(text, language);
        LiteralFactory literalFactory = LiteralFactory.getInstance();
        Graph g = ci.getMetadata();
        for (NamedEntity ne : lista) {
            try {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                // add selected text as PlainLiteral in the language extracted from the text
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(ne.getFormKind(), lang)));
                g.add(new TripleImpl(textAnnotation, DC_TYPE, getEntityRefForType(ne.type)));
                if (ne.getFrom() != null && ne.getTo() != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(ne.getFrom().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(ne.getTo().intValue())));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(getSelectionContext(text, ne.getFormKind(), ne.getFrom().intValue()), lang)));
                }
            } catch (NoConvertorException e) {
                log.error(e.getMessage(), e);
            }
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI NER (Named Entity Recognition)" + " service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI NER (Named Entity Recognition) service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 12 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class DBPSpotlightAnnotateEnhancementTest method initTest.

@Before
public void initTest() throws IOException {
    // create the contentItem for testing
    ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    assertNotNull(ci);
    textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    assertNotNull(textContentPart);
    // add the language of the text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Before(org.junit.Before)

Example 13 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class SpotlightEngineUtils method createEntityAnnotation.

/**
 * Creates a fise:EntityAnnotation for the parsed parameters and
 * adds it the the {@link ContentItem#getMetadata()}. <p>
 * This method assumes a write lock on the parsed content item.
 * @param resource the candidate resource
 * @param engine the engine
 * @param ci the content item
 * @param textAnnotation the fise:TextAnnotation to dc:relate the
 * created fise:EntityAnnotation
 * @return the URI of the created fise:TextAnnotation
 */
public static IRI createEntityAnnotation(CandidateResource resource, EnhancementEngine engine, ContentItem ci, IRI textAnnotation) {
    IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, engine);
    Graph model = ci.getMetadata();
    Literal label = new PlainLiteralImpl(resource.label, new Language("en"));
    model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
    model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, resource.getUri()));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_CONTEXTUAL_SCORE, literalFactory.createTypedLiteral(resource.contextualScore)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PERCENTAGE_OF_SECOND_RANK, literalFactory.createTypedLiteral(resource.percentageOfSecondRank)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_SUPPORT, literalFactory.createTypedLiteral(resource.support)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_PRIOR_SCORE, literalFactory.createTypedLiteral(resource.priorScore)));
    model.add(new TripleImpl(entityAnnotation, PROPERTY_FINAL_SCORE, literalFactory.createTypedLiteral(resource.finalScore)));
    return entityAnnotation;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 14 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class DBPSpotlightCandidatesEnhancementTest method initTest.

@Before
public void initTest() throws IOException {
    // create the contentItem for testing
    ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    assertNotNull(ci);
    textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    assertNotNull(textContentPart);
    // add the language of the text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Before(org.junit.Before)

Example 15 with TripleImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TripleImpl in project stanbol by apache.

the class DBPSpotlightSpotEnhancementEngine method createEnhancements.

/**
 * The method adds the returned DBpedia Spotlight surface forms to the
 * content item's metadata. For each one an TextAnnotation is created.
 *
 * @param occs
 *            a Collection of entity information
 * @param ci
 *            the content item
 */
protected void createEnhancements(Collection<SurfaceForm> occs, ContentItem ci, String content, Language lang) {
    HashMap<String, IRI> entityAnnotationMap = new HashMap<String, IRI>();
    Graph model = ci.getMetadata();
    for (SurfaceForm occ : occs) {
        IRI textAnnotation = SpotlightEngineUtils.createTextEnhancement(occ, this, ci, content, lang);
        if (entityAnnotationMap.containsKey(occ.name)) {
            model.add(new TripleImpl(entityAnnotationMap.get(occ.name), DC_RELATION, textAnnotation));
        } else {
            entityAnnotationMap.put(occ.name, textAnnotation);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) SurfaceForm(org.apache.stanbol.enhancer.engines.dbpspotlight.model.SurfaceForm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)143 IRI (org.apache.clerezza.commons.rdf.IRI)104 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)69 Graph (org.apache.clerezza.commons.rdf.Graph)66 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)49 Triple (org.apache.clerezza.commons.rdf.Triple)41 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)26 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)23 HashMap (java.util.HashMap)20 Language (org.apache.clerezza.commons.rdf.Language)20 Literal (org.apache.clerezza.commons.rdf.Literal)20 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)20 IOException (java.io.IOException)18 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)17 Test (org.junit.Test)16 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)15 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)14 HashSet (java.util.HashSet)13 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 BlankNode (org.apache.clerezza.commons.rdf.BlankNode)11