Search in sources :

Example 96 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class EnhancementStructureHelper method validateTextAnnotation.

/**
     * Validates fise:TextAnnotations. If <code>validatePrefixSuffix</code> is
     * enabled the fise:selection-prefix and fise:selection-suffix (as defined by
     * <a href="https://issues.apache.org/jira/browse/STANBOL-987">STANBOL-987</a>
     * are enforced and validated. If disabled those properties are not enforced but still
     * validated when present.
     * @param enhancements the enhancements graph containing the text annotation
     * @param textAnnotation the TextAnnotation to validate
     * @param content the enhanced content
     * @param expectedValues expected values (properties for the values are used as keys)
     * @param validatePrefixSuffix enforce the presence of fise:selection-prefix and 
     * fise:selection-suffix if fise:start and fise:end are set.
     */
public static void validateTextAnnotation(Graph enhancements, IRI textAnnotation, String content, Map<IRI, RDFTerm> expectedValues, boolean validatePrefixSuffix) {
    //validate the rdf:type
    Iterator<Triple> rdfTypeIterator = enhancements.filter(textAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION);
    assertTrue("Parsed Enhancement " + textAnnotation + " is missing the fise:TextAnnotation type ", rdfTypeIterator.hasNext());
    Iterator<Triple> selectedTextIterator = enhancements.filter(textAnnotation, ENHANCER_SELECTED_TEXT, null);
    // check if the selected text is added (or not)
    RDFTerm selectedTextResource;
    if (selectedTextIterator.hasNext()) {
        // test if the selected text is part of the TEXT_TO_TEST
        selectedTextResource = selectedTextIterator.next().getObject();
        assertTrue("fise:selected-text MUST BE of type PlainLiteral (uri: " + textAnnotation + ")", selectedTextResource instanceof Literal);
        Literal selectedText = (Literal) selectedTextResource;
        assertTrue("The parsed content MUST contain the fise:selected-text value '" + selectedText.getLexicalForm() + "' (uri: " + textAnnotation + ")!", content.contains(selectedText.getLexicalForm()));
        Assert.assertFalse("fise:selected-text MUST be single valued (uri: " + textAnnotation + ")", selectedTextIterator.hasNext());
    } else {
        //no selected text
        selectedTextResource = null;
    }
    //check against an expected value
    RDFTerm expectedSelectedText = expectedValues.get(ENHANCER_SELECTED_TEXT);
    if (expectedSelectedText != null) {
        assertEquals("The fise:selected-text is not the expected value " + expectedSelectedText + " (uri: " + textAnnotation + ")!", expectedSelectedText, selectedTextResource);
    }
    //check for fise:selection-head and fise:selection-tail (STANBOL-987)
    Iterator<Triple> selectionHeadIterator = enhancements.filter(textAnnotation, Properties.ENHANCER_SELECTION_HEAD, null);
    if (selectedTextResource != null) {
        Assert.assertFalse("If fise:selected-text is present fise:selection-head MUST NOT be present", selectionHeadIterator.hasNext());
    }
    RDFTerm selectionHeadResource;
    if (selectionHeadIterator.hasNext()) {
        // test if the selected text is part of the TEXT_TO_TEST
        selectionHeadResource = selectionHeadIterator.next().getObject();
        assertTrue("fise:selection-head MUST BE of type PlainLiteral (uri: " + textAnnotation + ")", selectionHeadResource instanceof Literal);
        Literal selectionHeadText = (Literal) selectionHeadResource;
        assertTrue("The parsed content MUST contain the fise:selected-head value '" + selectionHeadText.getLexicalForm() + "' (uri: " + textAnnotation + ")!", content.contains(selectionHeadText.getLexicalForm()));
        Assert.assertFalse("fise:selection-head MUST be single valued (uri: " + textAnnotation + ")", selectionHeadIterator.hasNext());
    } else {
        selectionHeadResource = null;
    }
    Iterator<Triple> selectionTailIterator = enhancements.filter(textAnnotation, Properties.ENHANCER_SELECTION_TAIL, null);
    if (selectedTextResource != null) {
        Assert.assertFalse("If fise:selected-text is present fise:selection-tail MUST NOT be present", selectionTailIterator.hasNext());
    }
    RDFTerm selectionTailResource;
    if (selectionTailIterator.hasNext()) {
        // test if the selected text is part of the TEXT_TO_TEST
        selectionTailResource = selectionTailIterator.next().getObject();
        assertTrue("fise:selection-head MUST BE of type PlainLiteral (uri: " + textAnnotation + ")", selectionTailResource instanceof Literal);
        Literal selectionTailText = (Literal) selectionTailResource;
        assertTrue("The parsed content MUST contain the fise:selected-tail value '" + selectionTailText.getLexicalForm() + "' (uri: " + textAnnotation + ")!", content.contains(selectionTailText.getLexicalForm()));
        Assert.assertFalse("fise:selection-tail MUST be single valued (uri: " + textAnnotation + ")", selectionTailIterator.hasNext());
    } else {
        selectionTailResource = null;
    }
    Assert.assertTrue("Both fise:selection-tail AND fise:selection-head MUST BE defined " + "(if one of them is present) (uri: " + textAnnotation + ")", (selectionHeadResource != null && selectionTailResource != null) || (selectionHeadResource == null && selectionTailResource == null));
    RDFTerm selectionContextResource;
    // test if context is added
    Iterator<Triple> selectionContextIterator = enhancements.filter(textAnnotation, ENHANCER_SELECTION_CONTEXT, null);
    if (selectionContextIterator.hasNext()) {
        //context is optional
        //selection context is not allowed without selected-text
        assertTrue("If fise:selection-context is present also fise:selected-text or fise:selection-head and fise:selection-tail MUST BE present (uri: " + textAnnotation + ")", selectedTextResource != null || (selectionHeadResource != null && selectionTailResource != null));
        // test if the selected text is part of the TEXT_TO_TEST
        selectionContextResource = selectionContextIterator.next().getObject();
        assertTrue("The fise:selection-context MUST BE of type PlainLiteral (uri: " + textAnnotation + ")", selectionContextResource instanceof Literal);
        //check that the content contains the context
        assertTrue("The fise:selection-context MUST BE contained in the Content | context= " + selectionContextResource, content.contains(((Literal) selectionContextResource).getLexicalForm()));
        //check that the context contains the selected text
        if (selectedTextResource != null) {
            assertTrue("The fise:selected-text value MUST BE containted within the fise:selection-context value", ((Literal) selectionContextResource).getLexicalForm().contains(((Literal) selectedTextResource).getLexicalForm()));
        }
        if (selectionHeadResource != null) {
            assertTrue("The fise:selection-head value MUST BE containted within the fise:selection-context value", ((Literal) selectionContextResource).getLexicalForm().contains(((Literal) selectionHeadResource).getLexicalForm()));
        }
        if (selectionTailResource != null) {
            assertTrue("The fise:selection-tail value MUST BE containted within the fise:selection-context value", ((Literal) selectionContextResource).getLexicalForm().contains(((Literal) selectionTailResource).getLexicalForm()));
        }
    } else {
        assertNull("If no fise:selection-context is present also fise:selected-text MUST BE NOT present!", selectedTextResource);
        selectionContextResource = null;
    }
    RDFTerm expectedSelectionContext = expectedValues.get(ENHANCER_SELECTION_CONTEXT);
    if (expectedSelectionContext != null) {
        assertEquals("The value of fise:selection-context has not the expected value " + expectedSelectionContext, expectedSelectionContext, selectionContextResource);
    }
    //test start/end if present
    Iterator<Triple> startPosIterator = enhancements.filter(textAnnotation, ENHANCER_START, null);
    Iterator<Triple> endPosIterator = enhancements.filter(textAnnotation, ENHANCER_END, null);
    //start end is optional, but if start is present, that also end needs to be set
    Literal startPosLiteral;
    Literal endPosLiteral;
    if (startPosIterator.hasNext()) {
        //NOTE: TextAnnotations might be use to select whole sections of a text
        //      (e.g. see STANBOL-617) in those cases adding the text of the
        //      whole section is not feasible.
        //assertNotNull("If fise:start is present the fise:selection-context MUST also be present (uri: "+textAnnotation+")!",
        //    selectionContextResource);
        RDFTerm resource = startPosIterator.next().getObject();
        //only a single start position is supported
        assertFalse("fise:start MUST HAVE only a single value (uri: " + textAnnotation + ")!", startPosIterator.hasNext());
        assertTrue("fise:start MUST be a typed Literal (uri: " + textAnnotation + ")!", resource instanceof Literal);
        startPosLiteral = (Literal) resource;
        assertEquals("fise:start MUST use xsd:int as data type (uri: " + textAnnotation + ")", XSD.int_, startPosLiteral.getDataType());
        resource = null;
        Integer start = LiteralFactory.getInstance().createObject(Integer.class, startPosLiteral);
        assertNotNull("Unable to parse Integer from TypedLiteral " + startPosLiteral, start);
        //now get the end
        //end must be defined if start is present
        assertTrue("If fise:start is present also fise:end MUST BE defined (uri: " + textAnnotation + ")!", endPosIterator.hasNext());
        resource = endPosIterator.next().getObject();
        //only a single end position is supported
        assertFalse("fise:end MUST HAVE only a single value (uri: " + textAnnotation + ")!", endPosIterator.hasNext());
        assertTrue("fise:end values MUST BE TypedLiterals (uri: " + textAnnotation + ")", resource instanceof Literal);
        endPosLiteral = (Literal) resource;
        assertEquals("fise:end MUST use xsd:int as data type (uri: " + textAnnotation + ")", XSD.int_, endPosLiteral.getDataType());
        resource = null;
        Integer end = LiteralFactory.getInstance().createObject(Integer.class, endPosLiteral);
        assertNotNull("Unable to parse Integer from TypedLiteral " + endPosLiteral, end);
        //System.out.println("TA ["+start+"|"+end+"]"+selectedText.getLexicalForm()+"<->"+content.substring(start,end));
        if (selectedTextResource != null) {
            assertEquals("the substring [fise:start,fise:end] does not correspond to " + "the fise:selected-text value '" + ((Literal) selectedTextResource).getLexicalForm() + "' of this TextAnnotation!", content.substring(start, end), ((Literal) selectedTextResource).getLexicalForm());
        }
    // else no selected-text present ... unable to test this
    } else {
        assertNull("if fise:selected-text is present also fise:start AND fise:end MUST BE present!", selectedTextResource);
        assertNull("If fise:selection-context is present also fise:start AND fise:end MUST BE present!", selectionContextResource);
        assertFalse("if fise:end is presnet also fise:start AND fise:selection-context MUST BE present!", endPosIterator.hasNext());
        startPosLiteral = null;
        endPosLiteral = null;
    }
    RDFTerm expectedStartPos = expectedValues.get(ENHANCER_START);
    if (expectedStartPos != null) {
        assertEquals("The fise:start value is not the expected " + expectedStartPos, expectedStartPos, startPosLiteral);
    }
    RDFTerm expectedEndPos = expectedValues.get(ENHANCER_END);
    if (expectedEndPos != null) {
        assertEquals("The fise:end value is not the expected " + expectedEndPos, expectedEndPos, endPosLiteral);
    }
    //fise:selection-prefix and fise:selection-suffix (STANBOL-987)
    Literal prefixLiteral;
    Iterator<Triple> selectionPrefixIterator = enhancements.filter(textAnnotation, Properties.ENHANCER_SELECTION_PREFIX, null);
    if (startPosLiteral != null) {
        // check if the selectionPrefix text is present
        assertTrue("fise:selection-prefix property is missing for fise:TextAnnotation " + textAnnotation, selectionPrefixIterator.hasNext() || //to support old and new fise:TextAnnotation model
        !validatePrefixSuffix);
        // test if the selected text is part of the TEXT_TO_TEST
        if (selectionPrefixIterator.hasNext()) {
            RDFTerm selectionPrefixResource = selectionPrefixIterator.next().getObject();
            assertTrue("fise:selection-prefix MUST BE of type PlainLiteral (uri: " + textAnnotation + ")", selectionPrefixResource instanceof Literal);
            prefixLiteral = (Literal) selectionPrefixResource;
            assertTrue("The parsed content MUST contain the fise:selection-prefix value '" + prefixLiteral.getLexicalForm() + "' (uri: " + textAnnotation + ")!", content.contains(prefixLiteral.getLexicalForm()));
            assertFalse("fise:selection-prefix MUST BE single valued (uri: " + textAnnotation + ")!", selectionPrefixIterator.hasNext());
        } else {
            prefixLiteral = null;
        }
    } else {
        prefixLiteral = null;
    }
    Literal suffixLiteral;
    Iterator<Triple> selectionSuffixIterator = enhancements.filter(textAnnotation, Properties.ENHANCER_SELECTION_SUFFIX, null);
    if (endPosLiteral != null) {
        // check if the selectionPrefix text is present
        assertTrue("fise:selection-suffix property is missing for fise:TextAnnotation " + textAnnotation, selectionSuffixIterator.hasNext() || //to support old and new fise:TextAnnotation model
        !validatePrefixSuffix);
        if (selectionSuffixIterator.hasNext()) {
            // test if the selected text is part of the TEXT_TO_TEST
            RDFTerm selectionSuffixResource = selectionSuffixIterator.next().getObject();
            assertTrue("fise:selection-suffix MUST BE of type PlainLiteral (uri: " + textAnnotation + ")", selectionSuffixResource instanceof Literal);
            suffixLiteral = (Literal) selectionSuffixResource;
            assertTrue("The parsed content MUST contain the fise:selection-suffix value '" + suffixLiteral.getLexicalForm() + "' (uri: " + textAnnotation + ")!", content.contains(suffixLiteral.getLexicalForm()));
            assertFalse("fise:selection-suffix MUST BE single valued (uri: " + textAnnotation + ")!", selectionSuffixIterator.hasNext());
        } else {
            suffixLiteral = null;
        }
    } else {
        suffixLiteral = null;
    }
    Assert.assertTrue("Both fise:selection-prefix AND fise:selection-suffix need to be present " + "(if one of them is present) (uri: " + textAnnotation + ")", (suffixLiteral != null && prefixLiteral != null) || (suffixLiteral == null && prefixLiteral == null));
    if (prefixLiteral != null && selectedTextResource != null) {
        String occurrence = prefixLiteral.getLexicalForm() + ((Literal) selectedTextResource).getLexicalForm() + suffixLiteral.getLexicalForm();
        assertTrue("The parsed content MUST contain the concated value of fise:selection-prefix," + "fise:selected-text and fise:selection-suffix (value: '" + occurrence + "' (uri: " + textAnnotation + ")!", content.contains(occurrence));
    }
    if (prefixLiteral != null && selectionHeadResource != null) {
        String occurrence = prefixLiteral.getLexicalForm() + ((Literal) selectionHeadResource).getLexicalForm();
        assertTrue("The parsed content MUST contain the concated value of fise:selection-prefix," + "fise:selection-head (value: '" + occurrence + "' (uri: " + textAnnotation + ")!", content.contains(occurrence));
        occurrence = ((Literal) selectionTailResource).getLexicalForm() + suffixLiteral.getLexicalForm();
        assertTrue("The parsed content MUST contain the concated value of fise:selection-tail " + "and fise:selection-suffix (value: '" + occurrence + "' (uri: " + textAnnotation + ")!", content.contains(occurrence));
    }
    //validate fise:Enhancement specific rules
    validateEnhancement(enhancements, textAnnotation, expectedValues);
    //validate for special TextAnnotations
    validateLanguageAnnotations(enhancements, textAnnotation);
    validateNERAnnotations(enhancements, textAnnotation, selectedTextResource);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 97 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class EnhancementStructureHelper method validateEntityAnnotation.

/**
     * Checks if a fise:EntityAnnotation is valid. NOTE that this also validates
     * all fise:Enhancement related requirements by calling
     * {@link #validateEnhancement(Graph, IRI, Map)}
     * @param enhancements the enhancements graph
     * @param entityAnnotation the entity annotation to validate
     * @param expectedValues expected values (properties for the values are used as keys)
     */
public static void validateEntityAnnotation(Graph enhancements, IRI entityAnnotation, Map<IRI, RDFTerm> expectedValues) {
    Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(entityAnnotation, DC_RELATION, null);
    // check if the relation to the text annotation is set
    assertTrue(relationToTextAnnotationIterator.hasNext());
    while (relationToTextAnnotationIterator.hasNext()) {
        // test if the referred annotations are text annotations or
        // the referenced annotations is a fise:EntityAnnotation AND also a
        // dc:requires link is defined (STANBOL-766)
        IRI referredTextAnnotation = (IRI) relationToTextAnnotationIterator.next().getObject();
        assertTrue("fise:EntityAnnotations MUST BE dc:related to a fise:TextAnnotation OR dc:requires and dc:related to the same fise:EntityAnnotation", enhancements.filter(referredTextAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION).hasNext() || (enhancements.filter(referredTextAnnotation, RDF_TYPE, ENHANCER_ENTITYANNOTATION).hasNext() && enhancements.filter(entityAnnotation, Properties.DC_REQUIRES, referredTextAnnotation).hasNext()));
    }
    // test if an entity is referred
    Iterator<Triple> entityReferenceIterator = enhancements.filter(entityAnnotation, ENHANCER_ENTITY_REFERENCE, null);
    assertTrue("fise:entity-reference MUST BE present! (EntityAnnotation: '" + entityAnnotation + "')'", entityReferenceIterator.hasNext());
    RDFTerm expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
    while (entityReferenceIterator.hasNext()) {
        //check possible multiple references
        RDFTerm entityReferenceResource = entityReferenceIterator.next().getObject();
        // test if the reference is an URI
        assertTrue("fise:entity-reference value MUST BE of URIs", entityReferenceResource instanceof IRI);
        if (expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)) {
            //found
            expectedReferencedEntity = null;
        }
    }
    assertNull("EntityAnnotation " + entityAnnotation + "fise:entity-reference has not the expected value " + expectedReferencedEntity + "!", expectedReferencedEntity);
    //test if the entity label is set
    Iterator<Triple> entityLabelIterator = enhancements.filter(entityAnnotation, ENHANCER_ENTITY_LABEL, null);
    assertTrue(entityLabelIterator.hasNext());
    RDFTerm expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
    while (entityLabelIterator.hasNext()) {
        RDFTerm entityLabelResource = entityLabelIterator.next().getObject();
        assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: " + entityAnnotation + ")!", entityLabelResource instanceof Literal);
        if (expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)) {
            expectedEntityLabel = null;
        }
    }
    assertNull("The expected EntityLabel " + expectedEntityLabel + " was not found", expectedEntityLabel);
    //test the optional entity types
    Iterator<Triple> entityTypeIterator = enhancements.filter(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
    RDFTerm expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
    if (entityTypeIterator.hasNext()) {
        RDFTerm entityTypeResource = entityTypeIterator.next().getObject();
        assertTrue("fise:entity-type values MUST BE URIs", entityTypeResource instanceof IRI);
        if (expectedEntityType != null && expectedEntityType.equals(entityTypeResource)) {
            //found
            expectedEntityType = null;
        }
    }
    assertNull("The expected fise:entity-type value " + expectedEntityType + " was not found!", expectedEntityType);
    //test all properties required by fise:Enhancement
    validateEnhancement(enhancements, entityAnnotation, expectedValues);
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) Literal(org.apache.clerezza.commons.rdf.Literal) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 98 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class EnhancementStructureHelper method validateLanguageAnnotations.

/**
     * Validates the correctness of fise:TextAnnotations that annotate the language 
     * of the text as defined by 
     * <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a><p>
     * Called by {@link #validateTextAnnotation(Graph, IRI, String, Map)}
     * @param enhancements
     * @param textAnnotation
     */
private static void validateLanguageAnnotations(Graph enhancements, IRI textAnnotation) {
    Iterator<Triple> dcLanguageIterator = enhancements.filter(textAnnotation, DC_LANGUAGE, null);
    if (dcLanguageIterator.hasNext()) {
        //a language annotation
        RDFTerm dcLanguageResource = dcLanguageIterator.next().getObject();
        assertTrue("The dc:language value MUST BE a PlainLiteral", dcLanguageResource instanceof Literal);
        assertTrue("The dc:language value '" + dcLanguageResource + "'MUST BE at least two chars long", ((Literal) dcLanguageResource).getLexicalForm().length() >= 2);
        assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:language value (uri " + textAnnotation + ")", dcLanguageIterator.hasNext());
        Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
        assertTrue("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri " + textAnnotation + ")", dcTypeIterator.hasNext());
        assertEquals("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri " + textAnnotation + ")", DCTERMS_LINGUISTIC_SYSTEM, dcTypeIterator.next().getObject());
        assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:type value (uri " + textAnnotation + ")", dcTypeIterator.hasNext());
        //assert that the created TextAnnotation is correctly returned by the
        //EnhancementEngineHelper methods
        List<BlankNodeOrIRI> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(enhancements);
        assertTrue("Language annotation " + textAnnotation + " was not returned by " + "EnhancementEngineHelper.getLanguageAnnotations(..)!", languageAnnotation.contains(textAnnotation));
    } else {
        //no language annotation
        Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
        while (dcTypeIterator.hasNext()) {
            assertFalse("Only fise:TextAnnotations without a dc:language value MUST NOT use the " + "dc:type value dc:LinguisticSystem (uri " + textAnnotation + ")", DCTERMS_LINGUISTIC_SYSTEM.equals(dcTypeIterator.next().getObject()));
        }
    }
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) Literal(org.apache.clerezza.commons.rdf.Literal) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm)

Example 99 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class EnhancerLDPath method getConfig.

/**
     * The LDPath configuration including the <ul>
     * <li> Namespaces defined by the {@link NamespaceEnum}
     * <li> the LDPath functions for the Stanbol Enhancement Structure
     * </ul>
     * @return the LDPath configuration for the Stanbol Enhancer
     */
public static final Configuration<RDFTerm> getConfig() {
    if (CONFIG == null) {
        CONFIG = new DefaultConfiguration<RDFTerm>();
        //add the namespaces
        for (NamespaceEnum ns : NamespaceEnum.values()) {
            CONFIG.addNamespace(ns.getPrefix(), ns.getNamespace());
        }
        //now add the functions
        addFunction(CONFIG, new ContentFunction());
        String path;
        NodeSelector<RDFTerm> selector;
        //TextAnnotations
        path = String.format("^%s[%s is %s]", ENHANCER_EXTRACTED_FROM, RDF_TYPE, ENHANCER_TEXTANNOTATION);
        try {
            selector = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select all TextAnnotations of a contentItem!", e);
        }
        addFunction(CONFIG, new PathFunction<RDFTerm>("textAnnotation", selector));
        //EntityAnnotations
        path = String.format("^%s[%s is %s]", ENHANCER_EXTRACTED_FROM, RDF_TYPE, ENHANCER_ENTITYANNOTATION);
        try {
            selector = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select all EntityAnnotations of a contentItem!", e);
        }
        addFunction(CONFIG, new PathFunction<RDFTerm>("entityAnnotation", selector));
        //TopicAnnotations
        path = String.format("^%s[%s is %s]", ENHANCER_EXTRACTED_FROM, RDF_TYPE, ENHANCER_TOPICANNOTATION);
        try {
            selector = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select all TopicAnnotations of a contentItem!", e);
        }
        addFunction(CONFIG, new PathFunction<RDFTerm>("topicAnnotation", selector));
        //Enhancements
        path = String.format("^%s[%s is %s]", ENHANCER_EXTRACTED_FROM, RDF_TYPE, ENHANCER_ENHANCEMENT);
        try {
            selector = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select all Enhancements of a contentItem!", e);
        }
        addFunction(CONFIG, new PathFunction<RDFTerm>("enhancement", selector));
        //Suggested EntityAnnotations for Text/TopicAnnotations
        //(1) to select the suggestions
        NodeSelector<RDFTerm> linkedEntityAnnotations;
        path = String.format("^%s[%s is %s]", DC_RELATION, RDF_TYPE, ENHANCER_ENTITYANNOTATION, ENHANCER_CONFIDENCE);
        try {
            linkedEntityAnnotations = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select all entity suggestions for an Enhancement!", e);
        }
        //(2) to select the confidence value of Enhancements
        NodeSelector<RDFTerm> confidenceSelector;
        path = ENHANCER_CONFIDENCE.toString();
        try {
            confidenceSelector = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select the confidence of suggestions!", e);
        }
        //The resultSelector is NULL because this directly returns the EntityAnnotations
        addFunction(CONFIG, new SuggestionFunction("suggestion", linkedEntityAnnotations, confidenceSelector, null));
        //Suggested Entities for Text/TopicAnnotations
        //The suggestion and confidence selectors can be the same as above,
        //but we need an additional result selector
        NodeSelector<RDFTerm> entityReferenceSelector;
        path = ENHANCER_ENTITY_REFERENCE.toString();
        try {
            entityReferenceSelector = Utils.parseSelector(path);
        } catch (ParseException e) {
            throw new IllegalStateException("Unable to parse the ld-path selector '" + path + "'used to select the entity referenced by a EntityAnnotation!", e);
        }
        addFunction(CONFIG, new SuggestionFunction("suggestedEntity", linkedEntityAnnotations, confidenceSelector, entityReferenceSelector));
    }
    return CONFIG;
}
Also used : SuggestionFunction(org.apache.stanbol.enhancer.ldpath.function.SuggestionFunction) ContentFunction(org.apache.stanbol.enhancer.ldpath.function.ContentFunction) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ParseException(org.apache.marmotta.ldpath.parser.ParseException) NamespaceEnum(org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum)

Example 100 with RDFTerm

use of org.apache.clerezza.commons.rdf.RDFTerm in project stanbol by apache.

the class ContentFunction method apply.

@Override
public Collection<RDFTerm> apply(ContentItemBackend backend, RDFTerm context, Collection<RDFTerm>... args) throws IllegalArgumentException {
    ContentItem ci = backend.getContentItem();
    Set<String> mimeTypes;
    if (args == null || args.length < 1) {
        mimeTypes = null;
    } else {
        mimeTypes = new HashSet<String>();
        for (Iterator<RDFTerm> params = Collections.concat(args).iterator(); params.hasNext(); ) {
            RDFTerm param = params.next();
            String mediaTypeString = backend.stringValue(param);
            try {
                mimeTypes.add(parseMimeType(mediaTypeString).get(null));
            } catch (IllegalArgumentException e) {
                log.warn(String.format("Invalid mediaType '%s' (based on RFC 2046) parsed!", mediaTypeString), e);
            }
        }
    }
    Collection<RDFTerm> result;
    Blob blob;
    if (mimeTypes == null || mimeTypes.isEmpty()) {
        blob = ci.getBlob();
    } else {
        Entry<IRI, Blob> entry = ContentItemHelper.getBlob(ci, mimeTypes);
        blob = entry != null ? entry.getValue() : null;
    }
    if (blob == null) {
        result = java.util.Collections.emptySet();
    } else {
        String charset = blob.getParameter().get("charset");
        try {
            if (charset != null) {
                result = java.util.Collections.singleton(backend.createLiteral(IOUtils.toString(blob.getStream(), charset)));
            } else {
                //binary content
                byte[] data = IOUtils.toByteArray(blob.getStream());
                result = java.util.Collections.singleton((RDFTerm) lf.createTypedLiteral(data));
            }
        } catch (IOException e) {
            throw new IllegalStateException("Unable to read contents from Blob '" + blob.getMimeType() + "' of ContentItem " + ci.getUri(), e);
        }
    }
    return result;
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) Blob(org.apache.stanbol.enhancer.servicesapi.Blob) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) IOException(java.io.IOException) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Aggregations

RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)126 IRI (org.apache.clerezza.commons.rdf.IRI)84 Triple (org.apache.clerezza.commons.rdf.Triple)70 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)48 Literal (org.apache.clerezza.commons.rdf.Literal)35 Test (org.junit.Test)35 HashSet (java.util.HashSet)30 HashMap (java.util.HashMap)28 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)26 Graph (org.apache.clerezza.commons.rdf.Graph)24 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)18 ArrayList (java.util.ArrayList)17 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)16 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)13 OWLOntologyID (org.semanticweb.owlapi.model.OWLOntologyID)13 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)12 Collection (java.util.Collection)10 IndexedGraph (org.apache.stanbol.commons.indexedgraph.IndexedGraph)10 Lock (java.util.concurrent.locks.Lock)9 IOException (java.io.IOException)5