Search in sources :

Example 1 with InvalidLiteralTypeException

use of org.apache.clerezza.rdf.core.InvalidLiteralTypeException in project stanbol by apache.

the class Mapping method toResource.

/**
     * Converts the parsed value based on the mapping information to an RDF
     * {@link RDFTerm}. Optionally supports also validation if the parsed
     * value is valid for the {@link Mapping#ontType ontology type} specified by
     * the parsed mapping.
     * @param value the value
     * @param mapping the mapping
     * @param validate 
     * @return the {@link RDFTerm} or <code>null</code> if the parsed value is
     * <code>null</code> or {@link String#isEmpty() empty}.
     * @throws IllegalArgumentException if the parsed {@link Mapping} is 
     * <code>null</code>
     */
protected RDFTerm toResource(String value, boolean validate) {
    //used for date validation
    Metadata dummy = null;
    if (value == null || value.isEmpty()) {
        //ignore null and empty values
        return null;
    }
    RDFTerm object;
    if (ontType == null) {
        object = new PlainLiteralImpl(value);
    } else if (ontType == RDFS.Resource) {
        try {
            if (validate) {
                new URI(value);
            }
            object = new IRI(value);
        } catch (URISyntaxException e) {
            log.warn("Unable to create Reference for value {} (not a valid URI)" + " -> create a literal instead", value);
            object = new PlainLiteralImpl(value);
        }
    } else {
        //typed literal
        Class<?> clazz = Mapping.ONT_TYPE_MAP.get(ontType);
        if (clazz.equals(Date.class)) {
            //parseDate(..) method
            if (dummy == null) {
                dummy = new Metadata();
            }
            //any Property with the Date type could be used here
            dummy.add(DATE.getName(), value);
            //access parseDate(..)
            Date date = dummy.getDate(DublinCore.DATE);
            if (date != null) {
                //now use the Clerezza Literal factory
                object = lf.createTypedLiteral(date);
            } else {
                //fall back to xsd:string
                object = new TypedLiteralImpl(value, XSD.string);
            }
        } else {
            object = new TypedLiteralImpl(value, ontType);
        }
        if (validate && clazz != null && !clazz.equals(Date.class)) {
            //we need not to validate dates
            try {
                lf.createObject(clazz, (Literal) object);
            } catch (NoConvertorException e) {
                log.info("Unable to validate typed literals of type {} because" + "there is no converter for Class {} registered with Clerezza", ontType, clazz);
            } catch (InvalidLiteralTypeException e) {
                log.info("The value '{}' is not valid for dataType {}!" + "create literal with type 'xsd:string' instead", value, ontType);
                object = new TypedLiteralImpl(value, XSD.string);
            }
        }
    //else no validation needed
    }
    if (converter != null) {
        object = converter.convert(object);
    }
    return object;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) Metadata(org.apache.tika.metadata.Metadata) InvalidLiteralTypeException(org.apache.clerezza.rdf.core.InvalidLiteralTypeException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) Date(java.util.Date)

Example 2 with InvalidLiteralTypeException

use of org.apache.clerezza.rdf.core.InvalidLiteralTypeException in project stanbol by apache.

the class EnhancementEngineHelper method getLanguageAnnotations.

/**
     * Getter for the Resources of fise:TextAnnotations that do have a value 
     * of the dc:language property. The returned list is sorted by 'fise:confidence'.
     * Annotations with missing confidence are ranked last.<p>
     * NOTE that the returned list will likely contain annotations for the same language
     * if multiple language identification are used in the same {@link Chain}.
     * @param graph the graph with the enhancement. 
     * Typically {@link ContentItem#getMetadata()}
     * @return the sorted list of language annotations or an empty list if none.
     * @throws IllegalArgumentException if <code>null</code> is parsed as graph
     */
public static List<BlankNodeOrIRI> getLanguageAnnotations(Graph graph) {
    if (graph == null) {
        throw new IllegalArgumentException("The parsed graph MUST NOT be NULL!");
    }
    // I do not use SPARQL, because I do not want to instantiate a QueryEngine
    final Map<BlankNodeOrIRI, Double> confidences = new HashMap<BlankNodeOrIRI, Double>();
    List<BlankNodeOrIRI> langAnnotations = new ArrayList<BlankNodeOrIRI>();
    Iterator<Triple> textAnnoataions = graph.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    while (textAnnoataions.hasNext()) {
        BlankNodeOrIRI textAnnotation = textAnnoataions.next().getSubject();
        String language = getString(graph, textAnnotation, DC_LANGUAGE);
        if (language != null) {
            Double confidence = null;
            try {
                confidence = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Double.class, lf);
            } catch (InvalidLiteralTypeException e) {
                // STANBOL-1417: not a double value
                try {
                    //try with float
                    Float fconf = get(graph, textAnnotation, ENHANCER_CONFIDENCE, Float.class, lf);
                    if (fconf != null) {
                        confidence = Double.valueOf(fconf.doubleValue());
                    }
                } catch (InvalidLiteralTypeException e1) {
                    log.warn("Unable to parse confidence for language annotation " + textAnnotation, e);
                }
            }
            confidences.put(textAnnotation, confidence);
            langAnnotations.add(textAnnotation);
        }
    }
    if (langAnnotations.size() > 1) {
        Collections.sort(langAnnotations, new Comparator<BlankNodeOrIRI>() {

            @Override
            public int compare(BlankNodeOrIRI o1, BlankNodeOrIRI o2) {
                Double c1 = confidences.get(o1);
                Double c2 = confidences.get(o2);
                //decrising order (values without confidence last)
                if (c1 == null) {
                    return c2 == null ? 0 : 1;
                } else if (c2 == null) {
                    return -1;
                } else {
                    return c2.compareTo(c1);
                }
            }
        });
    }
    return langAnnotations;
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) InvalidLiteralTypeException(org.apache.clerezza.rdf.core.InvalidLiteralTypeException) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Triple(org.apache.clerezza.commons.rdf.Triple)

Aggregations

BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)2 InvalidLiteralTypeException (org.apache.clerezza.rdf.core.InvalidLiteralTypeException)2 URI (java.net.URI)1 URISyntaxException (java.net.URISyntaxException)1 ArrayList (java.util.ArrayList)1 Date (java.util.Date)1 HashMap (java.util.HashMap)1 IRI (org.apache.clerezza.commons.rdf.IRI)1 Literal (org.apache.clerezza.commons.rdf.Literal)1 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)1 Triple (org.apache.clerezza.commons.rdf.Triple)1 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)1 TypedLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl)1 NoConvertorException (org.apache.clerezza.rdf.core.NoConvertorException)1 Metadata (org.apache.tika.metadata.Metadata)1