Search in sources :

Example 51 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class Mapping method toResource.

/**
     * Converts the parsed value based on the mapping information to an RDF
     * {@link RDFTerm}. Optionally supports also validation if the parsed
     * value is valid for the {@link Mapping#ontType ontology type} specified by
     * the parsed mapping.
     * @param value the value
     * @param mapping the mapping
     * @param validate 
     * @return the {@link RDFTerm} or <code>null</code> if the parsed value is
     * <code>null</code> or {@link String#isEmpty() empty}.
     * @throws IllegalArgumentException if the parsed {@link Mapping} is 
     * <code>null</code>
     */
protected RDFTerm toResource(String value, boolean validate) {
    //used for date validation
    Metadata dummy = null;
    if (value == null || value.isEmpty()) {
        //ignore null and empty values
        return null;
    }
    RDFTerm object;
    if (ontType == null) {
        object = new PlainLiteralImpl(value);
    } else if (ontType == RDFS.Resource) {
        try {
            if (validate) {
                new URI(value);
            }
            object = new IRI(value);
        } catch (URISyntaxException e) {
            log.warn("Unable to create Reference for value {} (not a valid URI)" + " -> create a literal instead", value);
            object = new PlainLiteralImpl(value);
        }
    } else {
        //typed literal
        Class<?> clazz = Mapping.ONT_TYPE_MAP.get(ontType);
        if (clazz.equals(Date.class)) {
            //parseDate(..) method
            if (dummy == null) {
                dummy = new Metadata();
            }
            //any Property with the Date type could be used here
            dummy.add(DATE.getName(), value);
            //access parseDate(..)
            Date date = dummy.getDate(DublinCore.DATE);
            if (date != null) {
                //now use the Clerezza Literal factory
                object = lf.createTypedLiteral(date);
            } else {
                //fall back to xsd:string
                object = new TypedLiteralImpl(value, XSD.string);
            }
        } else {
            object = new TypedLiteralImpl(value, ontType);
        }
        if (validate && clazz != null && !clazz.equals(Date.class)) {
            //we need not to validate dates
            try {
                lf.createObject(clazz, (Literal) object);
            } catch (NoConvertorException e) {
                log.info("Unable to validate typed literals of type {} because" + "there is no converter for Class {} registered with Clerezza", ontType, clazz);
            } catch (InvalidLiteralTypeException e) {
                log.info("The value '{}' is not valid for dataType {}!" + "create literal with type 'xsd:string' instead", value, ontType);
                object = new TypedLiteralImpl(value, XSD.string);
            }
        }
    //else no validation needed
    }
    if (converter != null) {
        object = converter.convert(object);
    }
    return object;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) Metadata(org.apache.tika.metadata.Metadata) InvalidLiteralTypeException(org.apache.clerezza.rdf.core.InvalidLiteralTypeException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TypedLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.TypedLiteralImpl) URISyntaxException(java.net.URISyntaxException) URI(java.net.URI) Date(java.util.Date)

Example 52 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TikaEngineTest method verifyValues.

private static Set<Literal> verifyValues(ContentItem ci, BlankNodeOrIRI subject, IRI property, IRI dataType, String... lexValues) {
    Iterator<Triple> it = ci.getMetadata().filter(subject, property, null);
    assertTrue(it.hasNext());
    Set<String> expected = new HashSet<String>(Arrays.asList(lexValues));
    Set<Literal> found = new HashSet<Literal>(expected.size());
    while (it.hasNext()) {
        RDFTerm r = it.next().getObject();
        if (dataType == null) {
            assertTrue(r instanceof Literal);
        } else {
            assertTrue(r instanceof Literal);
            assertEquals(dataType, ((Literal) r).getDataType());
        }
        assertTrue(expected.remove(((Literal) r).getLexicalForm()));
        found.add((Literal) r);
    }
    return found;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) HashSet(java.util.HashSet)

Example 53 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TikaEngineTest method testGEOMetadata.

@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException {
    log.info(">>> testGEOMetadata <<<");
    //first validate Media RDFTerm Ontology
    IRI hasLocation = new IRI(NamespaceEnum.media + "hasLocation");
    IRI locationLatitude = new IRI(NamespaceEnum.media + "locationLatitude");
    IRI locationLongitude = new IRI(NamespaceEnum.media + "locationLongitude");
    //IRI locationAltitude = new IRI(NamespaceEnum.media+"locationAltitude");
    //"video/x-ms-asf");
    ContentItem ci = createContentItem("testJPEG_GEO.jpg", OCTET_STREAM.toString());
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), hasLocation, null);
    assertTrue(it.hasNext());
    RDFTerm r = it.next().getObject();
    assertFalse(it.hasNext());
    assertTrue(r instanceof BlankNodeOrIRI);
    BlankNodeOrIRI location = verifyBlankNodeOrIRI(ci, hasLocation);
    //lat
    verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
    //long
    verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
    //second the GEO ont
    IRI lat = new IRI(NamespaceEnum.geo + "lat");
    IRI lon = new IRI(NamespaceEnum.geo + "long");
    //lat
    verifyValue(ci, lat, XSD.double_, "12.54321");
    //long
    verifyValue(ci, lon, XSD.double_, "-54.1234");
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 54 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class TikaEngineTest method verifyValue.

private static Literal verifyValue(ContentItem ci, BlankNodeOrIRI subject, IRI property, IRI dataType, String lexValue) throws ParseException {
    Iterator<Triple> it = ci.getMetadata().filter(subject, property, null);
    assertTrue(it.hasNext());
    RDFTerm r = it.next().getObject();
    assertFalse(it.hasNext());
    if (dataType != null) {
        assertEquals(dataType, ((Literal) r).getDataType());
    }
    //consider the time zone of the host running this test
    if (XSD.dateTime.equals(dataType) && lexValue.charAt(lexValue.length() - 1) != 'Z') {
        Date expectedDate = dateDefaultTimezone.parse(lexValue);
        assertEquals(expectedDate, lf.createObject(Date.class, ((Literal) r)));
    } else {
        assertEquals(lexValue, ((Literal) r).getLexicalForm());
    }
    return (Literal) r;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Date(java.util.Date)

Example 55 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class RdfRepresentation method remove.

@Override
public void remove(String field, Object parsedValue) {
    if (field == null) {
        throw new IllegalArgumentException("The parsed field MUST NOT be NULL");
    } else if (field.isEmpty()) {
        throw new IllegalArgumentException("The parsed field MUST NOT be Empty");
    }
    if (parsedValue == null) {
        log.warn("NULL parsed as value in remove method for symbol " + getId() + " and field " + field + " -> call ignored");
        return;
    }
    IRI fieldIRI = new IRI(field);
    Collection<Object> removeValues = new ArrayList<Object>();
    ModelUtils.checkValues(valueFactory, parsedValue, removeValues);
    //We still need to implement support for specific types supported by this implementation
    for (Object current : removeValues) {
        if (current instanceof RDFTerm) {
            //native support for Clerezza types!
            graphNode.deleteProperty(fieldIRI, (RDFTerm) current);
        } else if (current instanceof RdfReference) {
            //treat RDF Implementations special to avoid creating new instances
            graphNode.deleteProperty(fieldIRI, ((RdfReference) current).getIRI());
        } else if (current instanceof Reference) {
            graphNode.deleteProperty(fieldIRI, new IRI(((Reference) current).getReference()));
        } else if (current instanceof RdfText) {
            //treat RDF Implementations special to avoid creating new instances
            graphNode.deleteProperty(fieldIRI, ((RdfText) current).getLiteral());
        } else if (current instanceof Text) {
            removeNaturalText(field, ((Text) current).getText(), ((Text) current).getLanguage());
        } else {
            //else add an typed Literal!
            removeTypedLiteral(fieldIRI, current);
        }
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Reference(org.apache.stanbol.entityhub.servicesapi.model.Reference) ArrayList(java.util.ArrayList) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Text(org.apache.stanbol.entityhub.servicesapi.model.Text)

Aggregations

RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)126 IRI (org.apache.clerezza.commons.rdf.IRI)84 Triple (org.apache.clerezza.commons.rdf.Triple)70 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)48 Literal (org.apache.clerezza.commons.rdf.Literal)35 Test (org.junit.Test)35 HashSet (java.util.HashSet)30 HashMap (java.util.HashMap)28 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)26 Graph (org.apache.clerezza.commons.rdf.Graph)24 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)18 ArrayList (java.util.ArrayList)17 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)13 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Collection (java.util.Collection)10 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)10 Lock (java.util.concurrent.locks.Lock)9 IOException (java.io.IOException)5