Search in sources :

Example 1 with TypedLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl in project stanbol by apache.

the class Mapping method toResource.

/**
     * Converts the parsed value based on the mapping information to an RDF
     * {@link RDFTerm}. Optionally supports also validation if the parsed
     * value is valid for the {@link Mapping#ontType ontology type} specified by
     * the parsed mapping.
     * @param value the value
     * @param mapping the mapping
     * @param validate 
     * @return the {@link RDFTerm} or <code>null</code> if the parsed value is
     * <code>null</code> or {@link String#isEmpty() empty}.
     * @throws IllegalArgumentException if the parsed {@link Mapping} is 
     * <code>null</code>
     */
protected RDFTerm toResource(String value, boolean validate) {
    //used for date validation
    Metadata dummy = null;
    if (value == null || value.isEmpty()) {
        //ignore null and empty values
        return null;
    }
    RDFTerm object;
    if (ontType == null) {
        object = new PlainLiteralImpl(value);
    } else if (ontType == RDFS.Resource) {
        try {
            if (validate) {
                new URI(value);
            }
            object = new IRI(value);
        } catch (URISyntaxException e) {
            log.warn("Unable to create Reference for value {} (not a valid URI)" + " -> create a literal instead", value);
            object = new PlainLiteralImpl(value);
        }
    } else {
        //typed literal
        Class<?> clazz = Mapping.ONT_TYPE_MAP.get(ontType);
        if (clazz.equals(Date.class)) {
            //parseDate(..) method
            if (dummy == null) {
                dummy = new Metadata();
            }
            //any Property with the Date type could be used here
            dummy.add(DATE.getName(), value);
            //access parseDate(..)
            Date date = dummy.getDate(DublinCore.DATE);
            if (date != null) {
                //now use the Clerezza Literal factory
                object = lf.createTypedLiteral(date);
            } else {
                //fall back to xsd:string
                object = new TypedLiteralImpl(value, XSD.string);
            }
        } else {
            object = new TypedLiteralImpl(value, ontType);
        }
        if (validate && clazz != null && !clazz.equals(Date.class)) {
            //we need not to validate dates
            try {
                lf.createObject(clazz, (Literal) object);
            } catch (NoConvertorException e) {
                log.info("Unable to validate typed literals of type {} because" + "there is no converter for Class {} registered with Clerezza", ontType, clazz);
            } catch (InvalidLiteralTypeException e) {
                log.info("The value '{}' is not valid for dataType {}!" + "create literal with type 'xsd:string' instead", value, ontType);
                object = new TypedLiteralImpl(value, XSD.string);
            }
        }
    //else no validation needed
    }
    if (converter != null) {
        object = converter.convert(object);
    }
    return object;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) Metadata(org.apache.tika.metadata.Metadata) InvalidLiteralTypeException(org.apache.clerezza.rdf.core.InvalidLiteralTypeException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) Date(java.util.Date)

Example 2 with TypedLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl in project stanbol by apache.

the class EntityLinkingEngine method writeEnhancements.

/**
     * Writes the Enhancements for the {@link LinkedEntity LinkedEntities}
     * extracted from the parsed ContentItem
     * @param ci
     * @param linkedEntities
     * @param language
     */
private void writeEnhancements(ContentItem ci, Collection<LinkedEntity> linkedEntities, String language, boolean writeRankings) {
    Language languageObject = null;
    if (language != null && !language.isEmpty()) {
        languageObject = new Language(language);
    }
    Set<IRI> dereferencedEntitis = new HashSet<IRI>();
    Graph metadata = ci.getMetadata();
    for (LinkedEntity linkedEntity : linkedEntities) {
        Collection<IRI> textAnnotations = new ArrayList<IRI>(linkedEntity.getOccurrences().size());
        //first create the TextAnnotations for the Occurrences
        for (Occurrence occurrence : linkedEntity.getOccurrences()) {
            Literal startLiteral = literalFactory.createTypedLiteral(occurrence.getStart());
            Literal endLiteral = literalFactory.createTypedLiteral(occurrence.getEnd());
            //search for existing text annotation
            Iterator<Triple> it = metadata.filter(null, ENHANCER_START, startLiteral);
            IRI textAnnotation = null;
            while (it.hasNext()) {
                Triple t = it.next();
                if (metadata.filter(t.getSubject(), ENHANCER_END, endLiteral).hasNext() && metadata.filter(t.getSubject(), RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext()) {
                    textAnnotation = (IRI) t.getSubject();
                    break;
                }
            }
            if (textAnnotation == null) {
                //not found ... create a new one
                textAnnotation = EnhancementEngineHelper.createTextEnhancement(ci, this);
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_START, startLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_END, endLiteral));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(occurrence.getContext(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(occurrence.getSelectedText(), languageObject)));
                metadata.add(new TripleImpl(textAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(linkedEntity.getScore())));
            } else {
                //if existing add this engine as contributor
                metadata.add(new TripleImpl(textAnnotation, DC_CONTRIBUTOR, new PlainLiteralImpl(this.getClass().getName())));
            }
            //add dc:types (even to existing)
            for (IRI dcType : linkedEntity.getTypes()) {
                metadata.add(new TripleImpl(textAnnotation, Properties.DC_TYPE, dcType));
            }
            textAnnotations.add(textAnnotation);
        }
        //now the EntityAnnotations for the Suggestions
        for (Suggestion suggestion : linkedEntity.getSuggestions()) {
            IRI entityAnnotation = EnhancementEngineHelper.createEntityEnhancement(ci, this);
            //should we use the label used for the match, or search the
            //representation for the best label ... currently its the matched one
            Literal label = suggestion.getBestLabel(linkerConfig.getNameField(), language);
            Entity entity = suggestion.getEntity();
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_LABEL, label));
            metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_REFERENCE, entity.getUri()));
            Iterator<IRI> suggestionTypes = entity.getReferences(linkerConfig.getTypeField());
            while (suggestionTypes.hasNext()) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, suggestionTypes.next()));
            }
            metadata.add(new TripleImpl(entityAnnotation, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getScore())));
            for (IRI textAnnotation : textAnnotations) {
                metadata.add(new TripleImpl(entityAnnotation, Properties.DC_RELATION, textAnnotation));
            }
            //add origin information of the EntiySearcher
            for (Entry<IRI, Collection<RDFTerm>> originInfo : entitySearcher.getOriginInformation().entrySet()) {
                for (RDFTerm value : originInfo.getValue()) {
                    metadata.add(new TripleImpl(entityAnnotation, originInfo.getKey(), value));
                }
            }
            if (writeRankings) {
                Float ranking = suggestion.getEntity().getEntityRanking();
                if (ranking != null) {
                    metadata.add(new TripleImpl(entityAnnotation, ENHANCER_ENTITY_RANKING, //write the float as double
                    new TypedLiteralImpl(ranking.toString(), XSD_DOUBLE)));
                }
            }
            //add the RDF data for entities
            if (linkerConfig.isDereferenceEntitiesEnabled() && dereferencedEntitis.add(entity.getUri())) {
                //NOTE: do not add all triples as there might be other data in the graph
                for (Iterator<Triple> triples = entity.getData().filter(entity.getUri(), null, null); triples.hasNext(); metadata.add(triples.next())) ;
            }
        }
    }
}
Also used : LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) IRI(org.apache.clerezza.commons.rdf.IRI) LinkedEntity(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity) Entity(org.apache.stanbol.enhancer.engines.entitylinking.Entity) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) Triple(org.apache.clerezza.commons.rdf.Triple) Suggestion(org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion) Graph(org.apache.clerezza.commons.rdf.Graph) Language(org.apache.clerezza.commons.rdf.Language) NlpEngineHelper.getLanguage(org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage) Literal(org.apache.clerezza.commons.rdf.Literal) Collection(java.util.Collection) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) Occurrence(org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity.Occurrence) HashSet(java.util.HashSet)

Example 3 with TypedLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl in project stanbol by apache.

the class ClerezzaTripleCallback method triple.

private void triple(String s, String p, String value, String datatype, String language, String graph) {
    final BlankNodeOrIRI subject = getBlankNodeOrIRI(s);
    final IRI predicate = new IRI(p);
    RDFTerm object;
    if (language != null) {
        object = new PlainLiteralImpl(value, new Language(language));
    } else if (datatype == null || RDF_LANG_STRING.equals(datatype)) {
        object = new PlainLiteralImpl(value);
    } else {
        object = new TypedLiteralImpl(value, new IRI(datatype));
    }
    mGraph.add(new TripleImpl(subject, predicate, object));
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Language(org.apache.clerezza.commons.rdf.Language) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)3 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)3 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)3 TypedLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl)3 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)2 Language (org.apache.clerezza.commons.rdf.Language)2 Literal (org.apache.clerezza.commons.rdf.Literal)2 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)2 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 Date (java.util.Date)1 HashSet (java.util.HashSet)1 Graph (org.apache.clerezza.commons.rdf.Graph)1 Triple (org.apache.clerezza.commons.rdf.Triple)1 InvalidLiteralTypeException (org.apache.clerezza.rdf.core.InvalidLiteralTypeException)1 NoConvertorException (org.apache.clerezza.rdf.core.NoConvertorException)1 Entity (org.apache.stanbol.enhancer.engines.entitylinking.Entity)1 LinkedEntity (org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity)1