Search in sources :

Example 66 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class ContentItemReader method readFrom.

@Override
public ContentItem readFrom(Class<ContentItem> type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap<String, String> httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
    // boolean withMetadata = withMetadata(httpHeaders);
    ContentItem contentItem = null;
    IRI contentItemId = getContentItemId();
    if (log.isTraceEnabled()) {
        // NOTE: enabling TRACE level logging will copy the parsed content
        // into a BYTE array
        log.trace("Parse ContentItem from");
        log.trace("  - MediaType: {}", mediaType);
        log.trace("  - Headers:");
        for (Entry<String, List<String>> header : httpHeaders.entrySet()) {
            log.trace("      {}: {}", header.getKey(), header.getValue());
        }
        byte[] content = IOUtils.toByteArray(entityStream);
        log.trace("content: \n{}", new String(content, "UTF-8"));
        IOUtils.closeQuietly(entityStream);
        entityStream = new ByteArrayInputStream(content);
    }
    Set<String> parsedContentIds = new HashSet<String>();
    if (mediaType.isCompatible(MULTIPART)) {
        log.debug(" - parse Multipart MIME ContentItem");
        // try to read ContentItem from "multipart/from-data"
        Graph metadata = null;
        FileItemIterator fileItemIterator;
        try {
            fileItemIterator = fu.getItemIterator(new MessageBodyReaderContext(entityStream, mediaType));
            while (fileItemIterator.hasNext()) {
                FileItemStream fis = fileItemIterator.next();
                if (fis.getFieldName().equals("metadata")) {
                    if (contentItem != null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The Multipart MIME part with the 'metadata' " + "MUST BE before the MIME part containing the " + "'content'!").build());
                    }
                    // only used if not parsed as query param
                    if (contentItemId == null && fis.getName() != null && !fis.getName().isEmpty()) {
                        contentItemId = new IRI(fis.getName());
                    }
                    metadata = new IndexedGraph();
                    try {
                        getParser().parse(metadata, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse Metadata " + "from Multipart MIME part '%s' (" + "contentItem: %s| contentType: %s)", fis.getFieldName(), fis.getName(), fis.getContentType())).build());
                    }
                } else if (fis.getFieldName().equals("content")) {
                    contentItem = createContentItem(contentItemId, metadata, fis, parsedContentIds);
                } else if (fis.getFieldName().equals("properties") || fis.getFieldName().equals(REQUEST_PROPERTIES_URI.getUnicodeString())) {
                    // parse the RequestProperties
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for " + "Request Properties MUST BE after the " + "MIME parts for 'metadata' AND 'content'").build());
                    }
                    MediaType propMediaType = MediaType.valueOf(fis.getContentType());
                    if (!APPLICATION_JSON_TYPE.isCompatible(propMediaType)) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Request Properties (Multipart MIME parts" + "with the name '" + fis.getFieldName() + "') MUST " + "BE encoded as 'appicaltion/json' (encountered: '" + fis.getContentType() + "')!").build());
                    }
                    String propCharset = propMediaType.getParameters().get("charset");
                    if (propCharset == null) {
                        propCharset = "UTF-8";
                    }
                    Map<String, Object> reqProp = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
                    try {
                        reqProp.putAll(toMap(new JSONObject(IOUtils.toString(fis.openStream(), propCharset))));
                    } catch (JSONException e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity("Unable to parse Request Properties from" + "Multipart MIME parts with the name 'properties'!").build());
                    }
                } else {
                    // additional metadata as serialised RDF
                    if (contentItem == null) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts for additional " + "contentParts MUST BE after the MIME " + "parts for 'metadata' AND 'content'").build());
                    }
                    if (fis.getFieldName() == null || fis.getFieldName().isEmpty()) {
                        throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("Multipart MIME parts representing " + "ContentParts for additional RDF metadata" + "MUST define the contentParts URI as" + "'name' of the MIME part!").build());
                    }
                    Graph graph = new IndexedGraph();
                    try {
                        getParser().parse(graph, fis.openStream(), fis.getContentType());
                    } catch (Exception e) {
                        throw new WebApplicationException(e, Response.status(Response.Status.BAD_REQUEST).entity(String.format("Unable to parse RDF " + "for ContentPart '%s' ( contentType: %s)", fis.getName(), fis.getContentType())).build());
                    }
                    IRI contentPartId = new IRI(fis.getFieldName());
                    contentItem.addPart(contentPartId, graph);
                }
            }
            if (contentItem == null) {
                throw new WebApplicationException(Response.status(Response.Status.BAD_REQUEST).entity("The parsed multipart content item does not contain " + "any content. The content is expected to be contained " + "in a MIME part with the name 'content'. This part can " + " be also a 'multipart/alternate' if multiple content " + "parts need to be included in requests.").build());
            }
        } catch (FileUploadException e) {
            throw new WebApplicationException(e, Response.Status.BAD_REQUEST);
        }
    } else {
        // normal content
        ContentItemFactory ciFactory = getContentItemFactory();
        contentItem = ciFactory.createContentItem(contentItemId, new StreamSource(entityStream, mediaType.toString()));
        // add the URI of the main content
        parsedContentIds.add(contentItem.getPartUri(0).getUnicodeString());
    }
    // set the parsed contentIDs to the EnhancementProperties
    Map<String, Object> ep = ContentItemHelper.initRequestPropertiesContentPart(contentItem);
    parseEnhancementPropertiesFromParameters(ep);
    ep.put(PARSED_CONTENT_URIS, Collections.unmodifiableSet(parsedContentIds));
    // STANBOL-660: set the language of the content if explicitly parsed in the request
    String contentLanguage = getContentLanguage();
    if (!StringUtils.isBlank(contentLanguage)) {
        // language codes are case insensitive ... so we convert to lower case
        contentLanguage = contentLanguage.toLowerCase(Locale.ROOT);
        createParsedLanguageAnnotation(contentItem, contentLanguage);
    // previously only the dc:language property was set to the contentItem. However this
    // information is only used as fallback if no Language annotation is present. However
    // if a user explicitly parses the language he expects this language to be used
    // so this was change with STANBOL-1417
    // EnhancementEngineHelper.set(contentItem.getMetadata(), contentItem.getUri(),
    // DC_LANGUAGE, new PlainLiteralImpl(contentLanguage));
    }
    return contentItem;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) ContentItemFactory(org.apache.stanbol.enhancer.servicesapi.ContentItemFactory) WebApplicationException(javax.ws.rs.WebApplicationException) StreamSource(org.apache.stanbol.enhancer.servicesapi.impl.StreamSource) JSONException(org.codehaus.jettison.json.JSONException) URISyntaxException(java.net.URISyntaxException) WebApplicationException(javax.ws.rs.WebApplicationException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) JSONException(org.codehaus.jettison.json.JSONException) FileUploadException(org.apache.commons.fileupload.FileUploadException) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) Graph(org.apache.clerezza.commons.rdf.Graph) JSONObject(org.codehaus.jettison.json.JSONObject) ByteArrayInputStream(java.io.ByteArrayInputStream) FileItemStream(org.apache.commons.fileupload.FileItemStream) MediaType(javax.ws.rs.core.MediaType) List(java.util.List) ArrayList(java.util.ArrayList) JSONObject(org.codehaus.jettison.json.JSONObject) IndexedGraph(org.apache.stanbol.commons.indexedgraph.IndexedGraph) FileItemIterator(org.apache.commons.fileupload.FileItemIterator) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) FileUploadException(org.apache.commons.fileupload.FileUploadException) HashSet(java.util.HashSet)

Example 67 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class ContentItemTest method removeRemoveByUri.

@Test
public void removeRemoveByUri() throws IOException {
    ContentItem ci = createContentItem(contentSource);
    IRI uri = new IRI("urn:content.part:remove.test");
    ci.addPart(uri, new Date());
    try {
        ci.getPart(uri, Date.class);
    } catch (NoSuchPartException e) {
        assertFalse("The part with the uri " + uri + " was not added correctly", true);
    }
    ci.removePart(uri);
    try {
        ci.getPart(uri, Date.class);
        assertFalse("The part with the uri " + uri + " was not removed correctly", true);
    } catch (NoSuchPartException e) {
    // expected
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) NoSuchPartException(org.apache.stanbol.enhancer.servicesapi.NoSuchPartException) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Date(java.util.Date) Test(org.junit.Test)

Example 68 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class EnhancementStructureHelper method validateEnhancement.

/**
 * Validates all fise:Enhancement related properties and values. NOTE that
 * this method is called by {@link #validateEntityAnnotation(Graph, IRI, Map)}
 * and {@link #validateTextAnnotation(Graph, IRI, String)}.
 * @param enhancements the enhancements graph
 * @param enhancement the fise:Enhancement to validate
 * @param expectedValues expected values (properties for the values are used as keys)
 */
public static void validateEnhancement(Graph enhancements, IRI enhancement, Map<IRI, RDFTerm> expectedValues) {
    // validate the rdf:type
    Iterator<Triple> rdfTypeIterator = enhancements.filter(enhancement, RDF_TYPE, ENHANCER_ENHANCEMENT);
    assertTrue("Parsed Enhancement " + enhancement + " is missing the fise:Enhancement type ", rdfTypeIterator.hasNext());
    // validate the creator
    Iterator<Triple> creatorIterator = enhancements.filter(enhancement, Properties.DC_CREATOR, null);
    assertTrue("Enhancements MUST HAVE a creator", creatorIterator.hasNext());
    RDFTerm creatorResource = creatorIterator.next().getObject();
    assertTrue("Creator MUST BE an TypedLiteral (found '" + creatorResource.getClass().getSimpleName() + "')!", creatorResource instanceof Literal || creatorResource instanceof IRI);
    if (creatorResource instanceof Literal) {
        assertEquals("The dc:creator value MUST be of dataType xsd:string", XSD.string, ((Literal) creatorResource).getDataType());
    }
    RDFTerm expectedCreator = expectedValues.get(Properties.DC_CREATOR);
    if (expectedCreator != null) {
        assertEquals("Creator is not the expected value!", expectedCreator, creatorResource);
    }
    assertFalse("only a single creater MUST BE present for an Enhancement", creatorIterator.hasNext());
    // validate the optional contributor
    RDFTerm expectedContributor = expectedValues.get(DCTERMS.contributor);
    Iterator<Triple> contributorIterator = enhancements.filter(enhancement, DCTERMS.contributor, null);
    while (contributorIterator.hasNext()) {
        RDFTerm contributorResource = contributorIterator.next().getObject();
        assertTrue("Creator MUST BE an TypedLiteral or an IRI (found '" + contributorResource.getClass().getSimpleName() + "')!", contributorResource instanceof Literal || contributorResource instanceof IRI);
        if (contributorResource instanceof Literal) {
            assertEquals("The dc:contributor value MUST be of dataType xsd:string", XSD.string, ((Literal) contributorResource).getDataType());
        }
        if (expectedContributor != null && expectedContributor.equals(expectedContributor)) {
            // found
            expectedContributor = null;
        }
    }
    assertNull("The expected contributor '" + expectedContributor + "'was not present in the Enhancement", expectedContributor);
    // validate creation date
    Iterator<Triple> createdIterator = enhancements.filter(enhancement, Properties.DC_CREATED, null);
    assertTrue("The creation date MUST BE present for an Enhancement", createdIterator.hasNext());
    RDFTerm createdResource = createdIterator.next().getObject();
    assertTrue("Creation date MUST be a typed Literal", createdResource instanceof Literal);
    assertTrue("Creation date MUST have the dataTyoe xsd:dateTime", XSD.dateTime.equals(((Literal) createdResource).getDataType()));
    Date creationDate = LiteralFactory.getInstance().createObject(Date.class, (Literal) createdResource);
    assertNotNull("Unable to convert " + createdResource + " to a Java Date object", creationDate);
    Date now = new Date();
    assertTrue("CreationDate MUST NOT be in the Future", now.after(creationDate) || now.equals(creationDate));
    assertFalse("Only a single createnDate MUST BE present", createdIterator.hasNext());
    // validate optional modification date if present
    Iterator<Triple> modDateIterator = enhancements.filter(enhancement, DCTERMS.modified, null);
    while (modDateIterator.hasNext()) {
        RDFTerm modDateResurce = modDateIterator.next().getObject();
        assertTrue("Creation date MUST be a typed Literal", modDateResurce instanceof Literal);
        assertTrue("Creation date MUST have the dataTyoe xsd:dateTime", XSD.dateTime.equals(((Literal) modDateResurce).getDataType()));
        Date modDate = LiteralFactory.getInstance().createObject(Date.class, (Literal) modDateResurce);
        assertNotNull("Unable to convert " + modDateResurce + " to a Java Date object", modDate);
        assertTrue("CreationDate MUST NOT be in the Future", new Date().after(modDate));
    }
    // validate the fise:extracted-from
    Iterator<Triple> extractedIterator = enhancements.filter(enhancement, Properties.ENHANCER_EXTRACTED_FROM, null);
    assertTrue("The fise:extracted-from property MUST BE present for an Enhancement", extractedIterator.hasNext());
    RDFTerm extractedResource = extractedIterator.next().getObject();
    assertTrue("Creator MUST BE an IRI (found '" + extractedResource.getClass().getSimpleName() + "')!", extractedResource instanceof IRI);
    RDFTerm expectedExtractedFrom = expectedValues.get(Properties.ENHANCER_EXTRACTED_FROM);
    if (expectedExtractedFrom != null) {
        assertEquals("fise:extracted-from has not the expected value!", expectedExtractedFrom, extractedResource);
    }
    assertFalse("only a single creater MUST BE present for an Enhancement", extractedIterator.hasNext());
    // validate that all dc:requires and dc:relation link to resources of type fise:Enhancement
    Iterator<Triple> relatedIterator = enhancements.filter(enhancement, Properties.DC_RELATION, null);
    while (relatedIterator.hasNext()) {
        RDFTerm relatedResource = relatedIterator.next().getObject();
        assertTrue("dc:relation values MUST BE URIs", relatedResource instanceof IRI);
        Iterator<Triple> relatedTypes = enhancements.filter((IRI) relatedResource, RDF_TYPE, TechnicalClasses.ENHANCER_ENHANCEMENT);
        assertTrue("dc:relation Resources MUST BE of rdf:type fise:Enhancement", relatedTypes.hasNext());
    }
    Iterator<Triple> requiresIterator = enhancements.filter(enhancement, Properties.DC_REQUIRES, null);
    while (requiresIterator.hasNext()) {
        RDFTerm requiredResource = requiresIterator.next().getObject();
        assertTrue("dc:requires values MUST BE URIs", requiredResource instanceof IRI);
        Iterator<Triple> relatedTypes = enhancements.filter((IRI) requiredResource, RDF_TYPE, TechnicalClasses.ENHANCER_ENHANCEMENT);
        assertTrue("dc:requires Resources MUST BE of rdf:type fise:Enhancement", relatedTypes.hasNext());
    }
    // validate that fise:confidence has [0..1] values and are of type xsd:float
    Iterator<Triple> confidenceIterator = enhancements.filter(enhancement, Properties.ENHANCER_CONFIDENCE, null);
    boolean confidenceRequired = expectedValues.containsKey(Properties.ENHANCER_CONFIDENCE);
    if (confidenceIterator.hasNext()) {
        // confidence is optional
        RDFTerm confidenceResource = confidenceIterator.next().getObject();
        assertTrue("fise:confidence value MUST BE a TypedLiteral", confidenceResource instanceof Literal);
        assertTrue("fise:confidence MUST BE xsd:double", XSD.double_.equals(((Literal) confidenceResource).getDataType()));
        Double confidence = LiteralFactory.getInstance().createObject(Double.class, (Literal) confidenceResource);
        assertNotNull("Unable to convert TypedLiteral '" + confidenceResource + "' to a Java Double value", confidence);
        assertFalse("fise:confidence MUST HAVE [0..1] values", confidenceIterator.hasNext());
        // STANBOL-630: confidence [0..1]
        assertTrue("fise:confidence MUST BE <= 1 (value= '" + confidence + "',enhancement " + enhancement + ")", 1.0 >= confidence.doubleValue());
        assertTrue("fise:confidence MUST BE >= 0 (value= '" + confidence + "',enhancement " + enhancement + ")", 0.0 <= confidence.doubleValue());
        RDFTerm expectedConfidence = expectedValues.get(Properties.ENHANCER_CONFIDENCE);
        if (expectedConfidence != null) {
            assertEquals("The fise:confidence for enhancement " + enhancement + " does not have the expected value", expectedConfidence, confidenceResource);
        }
    } else {
        assertFalse("The required fise:confidence value is missing for enhancement " + enhancement, confidenceRequired);
    }
    // validate that the (optional) dc:type is an URI and that there are not multiple values
    Iterator<Triple> dcTypeIterator = enhancements.filter(enhancement, Properties.DC_TYPE, null);
    RDFTerm expectedDcType = expectedValues.get(Properties.DC_TYPE);
    if (dcTypeIterator.hasNext()) {
        // dc:type is optional
        RDFTerm dcTypeResource = dcTypeIterator.next().getObject();
        assertTrue("dc:type values MUST BE URIs", dcTypeResource instanceof IRI);
        if (expectedDcType != null) {
            assertEquals("The dc:type value is not the expected " + expectedDcType + "!", expectedDcType, dcTypeResource);
        }
        assertFalse("Only a single dc:type value is allowed!", dcTypeIterator.hasNext());
    }
    // validate the fise:confidence-value introduced by STANBOL-631
    Iterator<Triple> confidenceLevelIterator = enhancements.filter(enhancement, Properties.ENHANCER_CONFIDENCE_LEVEL, null);
    RDFTerm expectedConfidenceValue = expectedValues.get(Properties.ENHANCER_CONFIDENCE_LEVEL);
    if (confidenceLevelIterator.hasNext()) {
        RDFTerm confidenceLevelResource = confidenceLevelIterator.next().getObject();
        assertTrue("fise:confidence-level values MUST BE URIs but found " + confidenceLevelResource, confidenceLevelResource instanceof IRI);
        assertNotNull("The fise:confidence-level value MUST BE one of the four " + "values defined in the ontology! (found: " + confidenceLevelResource + " | enhancement " + enhancement + ")", CONFIDENCE_LEVEL_ENUM.getConfidenceLevel((IRI) confidenceLevelResource));
        assertFalse("The fise:confidence-level property is functional and MUST " + "HAVE only a single value (enhancement " + enhancement + ")!", confidenceLevelIterator.hasNext());
    } else {
        assertNull("fise:confidence-level " + expectedConfidenceValue + "expected for Enhancement " + enhancement + "but no 'fise:confidence-level' value present!", expectedConfidenceValue);
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) Date(java.util.Date)

Example 69 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class EnhancementStructureHelper method validateTopicAnnotation.

/**
 * Checks if a fise:TopicAnnotation is valid as defined by
 * <a herf="https://issues.apache.org/jira/browse/STANBOL-617">STANBOL-617</a>.
 * NOTE that this also validates all fise:Enhancement related requirements by
 * calling {@link #validateEnhancement(Graph, IRI, Map)}
 * @param enhancements the enhancements graph
 * @param topicAnnotation the topic annotation to validate
 * @param expectedValues expected values (properties for the values are used as keys)
 */
public static void validateTopicAnnotation(Graph enhancements, IRI topicAnnotation, Map<IRI, RDFTerm> expectedValues) {
    // validate the rdf:type
    Iterator<Triple> rdfTypeIterator = enhancements.filter(topicAnnotation, RDF_TYPE, ENHANCER_TOPICANNOTATION);
    assertTrue("Parsed Enhancement " + topicAnnotation + " is missing the fise:TopicAnnotation type ", rdfTypeIterator.hasNext());
    // TopicAnnotations need to be linked to TextAnnotations describing the
    // section of the text that has a specific Topic.
    // If the topic is for the whole text the TextAnnotation will have no
    // selected-text value
    Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(topicAnnotation, DC_RELATION, null);
    // check if the relation to the text annotation is set
    assertTrue(relationToTextAnnotationIterator.hasNext());
    while (relationToTextAnnotationIterator.hasNext()) {
        // test if the referred annotations are text annotations
        IRI referredTextAnnotation = (IRI) relationToTextAnnotationIterator.next().getObject();
        assertTrue(enhancements.filter(referredTextAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext());
    }
    // test if an entity (the topic) is referred (NOTE: in contrast to
    // fise:EntityAnnotations this property is NOT required - cardinality [0..*]
    Iterator<Triple> entityReferenceIterator = enhancements.filter(topicAnnotation, ENHANCER_ENTITY_REFERENCE, null);
    RDFTerm expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
    while (entityReferenceIterator.hasNext()) {
        // check possible multiple references
        RDFTerm entityReferenceResource = entityReferenceIterator.next().getObject();
        // test if the reference is an URI
        assertTrue("fise:entity-reference value MUST BE of URIs", entityReferenceResource instanceof IRI);
        if (expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)) {
            // found
            expectedReferencedEntity = null;
        }
    }
    assertNull("EntityAnnotation " + topicAnnotation + "fise:entity-reference has not the expected value " + expectedReferencedEntity + "!", expectedReferencedEntity);
    // test if the entity label is set (required)
    Iterator<Triple> entityLabelIterator = enhancements.filter(topicAnnotation, ENHANCER_ENTITY_LABEL, null);
    assertTrue(entityLabelIterator.hasNext());
    RDFTerm expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
    while (entityLabelIterator.hasNext()) {
        RDFTerm entityLabelResource = entityLabelIterator.next().getObject();
        assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: " + topicAnnotation + ")!", entityLabelResource instanceof Literal);
        if (expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)) {
            expectedEntityLabel = null;
        }
    }
    assertNull("The expected EntityLabel " + expectedEntityLabel + " was not found", expectedEntityLabel);
    // test fise:entity-type(s). NOTE: this is not required - cardinality [0..*]
    Iterator<Triple> entityTypeIterator = enhancements.filter(topicAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
    RDFTerm expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
    if (entityTypeIterator.hasNext()) {
        RDFTerm entityTypeResource = entityTypeIterator.next().getObject();
        assertTrue("fise:entity-type values MUST BE URIs", entityTypeResource instanceof IRI);
        if (expectedEntityType != null && expectedEntityType.equals(entityTypeResource)) {
            // found
            expectedEntityType = null;
        }
    }
    assertNull("The expected fise:entity-type value " + expectedEntityType + " was not found!", expectedEntityType);
    // test all properties required by fise:Enhancement
    validateEnhancement(enhancements, topicAnnotation, expectedValues);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 70 with IRI

use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.

the class EnhancementStructureHelper method validateAllTextAnnotations.

/**
 * Validates all TextAnnotations contained in the parsed enhancement graph.
 * If <code>validatePrefixSuffix</code> is
 * enabled the fise:selection-prefix and fise:selection-suffix (as defined by
 * <a href="https://issues.apache.org/jira/browse/STANBOL-987">STANBOL-987</a>
 * are enforced and validated. If disabled those properties are not enforced but still
 * validated when present.
 * @param enhancements the enhancement graph
 * @param content the enhanced content
 * @param expectedValues the expected values of all validated EntityAnnotations.
 * Properties are used as keys. Typical example would be fise:extracted-from
 * with the id of the ContentItem as value; dc-terms:creator with the
 * {@link Class#getName()} as value.
 * @param validatePrefixSuffix enforce the presence of fise:selection-prefix and
 * fise:selection-suffix if fise:start and fise:end are set.
 * @return the number of found TextAnnotations
 */
@SuppressWarnings("unchecked")
public static int validateAllTextAnnotations(Graph enhancements, String content, Map<IRI, RDFTerm> expectedValues, boolean validatePrefixSuffix) {
    expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
    Iterator<Triple> textAnnotationIterator = enhancements.filter(null, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    // test if a textAnnotation is present
    // assertTrue(textAnnotationIterator.hasNext());
    // -> this might be used to test that there are no TextAnnotations
    int textAnnotationCount = 0;
    while (textAnnotationIterator.hasNext()) {
        IRI textAnnotation = (IRI) textAnnotationIterator.next().getSubject();
        // test if selected Text is added
        validateTextAnnotation(enhancements, textAnnotation, content, expectedValues, validatePrefixSuffix);
        textAnnotationCount++;
    }
    return textAnnotationCount;
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI)

Aggregations

IRI (org.apache.clerezza.commons.rdf.IRI)346 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)113 Graph (org.apache.clerezza.commons.rdf.Graph)109 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)104 Triple (org.apache.clerezza.commons.rdf.Triple)88 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)84 Test (org.junit.Test)78 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)58 HashSet (java.util.HashSet)50 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)46 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)39 HashMap (java.util.HashMap)38 IOException (java.io.IOException)37 ArrayList (java.util.ArrayList)37 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)36 Literal (org.apache.clerezza.commons.rdf.Literal)35 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)31 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)29 Recipe (org.apache.stanbol.rules.base.api.Recipe)29 Language (org.apache.clerezza.commons.rdf.Language)24