Search in sources :

Example 21 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class DBPSpotlightAnnotateEnhancementTest method initTest.

@Before
public void initTest() throws IOException {
    //create the contentItem for testing
    ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    assertNotNull(ci);
    textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    assertNotNull(textContentPart);
    //add the language of the text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Before(org.junit.Before)

Example 22 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class DBPSpotlightCandidatesEnhancementTest method initTest.

@Before
public void initTest() throws IOException {
    //create the contentItem for testing
    ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    assertNotNull(ci);
    textContentPart = ContentItemHelper.getBlob(ci, Collections.singleton("text/plain"));
    assertNotNull(textContentPart);
    //add the language of the text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), Properties.DC_LANGUAGE, new PlainLiteralImpl("en")));
    assertEquals("en", EnhancementEngineHelper.getLanguage(ci));
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Before(org.junit.Before)

Example 23 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class CeliLanguageIdentifierEnhancementEngine method computeEnhancements.

@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
    Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, SUPPORTED_MIMTYPES);
    if (contentPart == null) {
        throw new IllegalStateException("No ContentPart with Mimetype '" + TEXT_PLAIN_MIMETYPE + "' found for ContentItem " + ci.getUri() + ": This is also checked in the canEnhance method! -> This " + "indicated an Bug in the implementation of the " + "EnhancementJobManager!");
    }
    String text = "";
    try {
        text = ContentItemHelper.getText(contentPart.getValue());
    } catch (IOException e) {
        throw new InvalidContentException(this, ci, e);
    }
    if (text.trim().length() == 0) {
        log.info("No text contained in ContentPart {" + contentPart.getKey() + "} of ContentItem {" + ci.getUri() + "}");
        return;
    }
    try {
        String[] tmps = text.split(" ");
        List<GuessedLanguage> lista = null;
        if (tmps.length > 5)
            lista = this.client.guessLanguage(text);
        else
            lista = this.client.guessQueryLanguage(text);
        Graph g = ci.getMetadata();
        //in ENHANCE_ASYNC we need to use read/write locks on the ContentItem
        ci.getLock().writeLock().lock();
        try {
            GuessedLanguage gl = lista.get(0);
            IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
            g.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(gl.getLang())));
            g.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(gl.getConfidence())));
            g.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
        } finally {
            ci.getLock().writeLock().unlock();
        }
    } catch (IOException e) {
        throw new EngineException("Error while calling the CELI language" + " identifier service (configured URL: " + serviceURL + ")!", e);
    } catch (SOAPException e) {
        throw new EngineException("Error wile encoding/decoding the request/" + "response to the CELI language identifier service!", e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) IOException(java.io.IOException) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) Graph(org.apache.clerezza.commons.rdf.Graph) SOAPException(javax.xml.soap.SOAPException) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 24 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class DBPSpotlightDisambiguateEnhancementEngine method createEnhancements.

/**
	 * The method adds the returned DBpedia Spotlight annotations to the content
	 * item's metadata. For each DBpedia resource an EntityAnnotation is created
	 * and linked to the according TextAnnotation.
	 * 
	 * @param occs
	 *            a Collection of entity information
	 * @param ci
	 *            the content item
	 */
public void createEnhancements(Collection<Annotation> occs, ContentItem ci, Language language) {
    HashMap<RDFTerm, IRI> entityAnnotationMap = new HashMap<RDFTerm, IRI>();
    for (Annotation occ : occs) {
        if (textAnnotationsMap.get(occ.surfaceForm) != null) {
            IRI textAnnotation = textAnnotationsMap.get(occ.surfaceForm);
            Graph model = ci.getMetadata();
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            entityAnnotationMap.put(occ.uri, entityAnnotation);
            Literal label = new PlainLiteralImpl(occ.surfaceForm.name, language);
            model.add(new TripleImpl(entityAnnotation, DC_RELATION, textAnnotation));
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_LABEL, label));
            Collection<String> t = occ.getTypeNames();
            if (t != null) {
                Iterator<String> it = t.iterator();
                while (it.hasNext()) model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_TYPE, new IRI(it.next())));
            }
            model.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, occ.uri));
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Graph(org.apache.clerezza.commons.rdf.Graph) HashMap(java.util.HashMap) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Annotation(org.apache.stanbol.enhancer.engines.dbpspotlight.model.Annotation)

Example 25 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class NEREngineCore method findNamedEntities.

protected void findNamedEntities(final ContentItem ci, final AnalysedText at, final String text, final String lang, final TokenNameFinderModel nameFinderModel) {
    if (ci == null) {
        throw new IllegalArgumentException("Parsed ContentItem MUST NOT be NULL");
    }
    if (at == null && text == null) {
        log.warn("NULL was parsed as AnalysedText AND Text for content item " + ci.getUri() + ". One of the two MUST BE present! -> call ignored");
        return;
    }
    final Language language;
    if (lang != null && !lang.isEmpty()) {
        language = new Language(lang);
    } else {
        language = null;
    }
    if (log.isDebugEnabled()) {
        log.debug("findNamedEntities model={},  language={}, text=", new Object[] { nameFinderModel, language, StringUtils.abbreviate(at != null ? at.getSpan() : text, 100) });
    }
    LiteralFactory literalFactory = LiteralFactory.getInstance();
    Graph g = ci.getMetadata();
    Map<String, List<NameOccurrence>> entityNames;
    if (at != null) {
        entityNames = extractNameOccurrences(nameFinderModel, at, lang);
    } else {
        entityNames = extractNameOccurrences(nameFinderModel, text, lang);
    }
    //lock the ContentItem while writing the RDF data for found Named Entities
    ci.getLock().writeLock().lock();
    try {
        Map<String, IRI> previousAnnotations = new LinkedHashMap<String, IRI>();
        for (Map.Entry<String, List<NameOccurrence>> nameInContext : entityNames.entrySet()) {
            String name = nameInContext.getKey();
            List<NameOccurrence> occurrences = nameInContext.getValue();
            IRI firstOccurrenceAnnotation = null;
            for (NameOccurrence occurrence : occurrences) {
                IRI textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(name, language)));
                g.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.context, language)));
                if (occurrence.type != null) {
                    g.add(new TripleImpl(textAnnotation, DC_TYPE, occurrence.type));
                }
                if (occurrence.confidence != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(occurrence.confidence)));
                }
                if (occurrence.start != null && occurrence.end != null) {
                    g.add(new TripleImpl(textAnnotation, ENHANCER_START, literalFactory.createTypedLiteral(occurrence.start)));
                    g.add(new TripleImpl(textAnnotation, ENHANCER_END, literalFactory.createTypedLiteral(occurrence.end)));
                }
                // name
                if (firstOccurrenceAnnotation == null) {
                    // specific occurrence
                    for (Map.Entry<String, IRI> entry : previousAnnotations.entrySet()) {
                        if (entry.getKey().contains(name)) {
                            // we have found a most specific previous
                            // occurrence, use it as subsumption target
                            firstOccurrenceAnnotation = entry.getValue();
                            g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
                            break;
                        }
                    }
                    if (firstOccurrenceAnnotation == null) {
                        // no most specific previous occurrence, I am the first,
                        // most specific occurrence to be later used as a target
                        firstOccurrenceAnnotation = textAnnotation;
                        previousAnnotations.put(name, textAnnotation);
                    }
                } else {
                    // I am referring to a most specific first occurrence of the
                    // same name
                    g.add(new TripleImpl(textAnnotation, DC_RELATION, firstOccurrenceAnnotation));
                }
            }
        }
    } finally {
        ci.getLock().writeLock().unlock();
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) LiteralFactory(org.apache.clerezza.rdf.core.LiteralFactory) LinkedHashMap(java.util.LinkedHashMap) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) List(java.util.List) ArrayList(java.util.ArrayList) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Aggregations

PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)82 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)69 IRI (org.apache.clerezza.commons.rdf.IRI)58 Graph (org.apache.clerezza.commons.rdf.Graph)34 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)20 Language (org.apache.clerezza.commons.rdf.Language)19 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 Literal (org.apache.clerezza.commons.rdf.Literal)16 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)16 IOException (java.io.IOException)14 HashMap (java.util.HashMap)13 Triple (org.apache.clerezza.commons.rdf.Triple)12 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)12 ArrayList (java.util.ArrayList)11 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)11 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)10 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)10 Test (org.junit.Test)10 HashSet (java.util.HashSet)8 SOAPException (javax.xml.soap.SOAPException)6