Search in sources :

Example 51 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class MultipartRequestTest method testUploadWithMetadata.

/**
     * Stanbol also supports to upload pre-existing metadata with the content.
     * This UnitTest uses an example that parsed TextAnnotations for free text
     * tags provided by users that are than linked to Entities in DBPedia
     * @throws IOException
     */
@Test
public void testUploadWithMetadata() throws IOException {
    //create the metadata
    RDFTerm user = new PlainLiteralImpl("Rupert Westenthaler");
    final IRI contentItemId = new IRI("http://www.example.com/test.html");
    Graph metadata = new SimpleGraph();
    addTagAsTextAnnotation(metadata, contentItemId, "Germany", DBPEDIA_PLACE, user);
    addTagAsTextAnnotation(metadata, contentItemId, "Europe", DBPEDIA_PLACE, user);
    addTagAsTextAnnotation(metadata, contentItemId, "NATO", DBPEDIA_ORGANISATION, user);
    addTagAsTextAnnotation(metadata, contentItemId, "Silvio Berlusconi", DBPEDIA_PERSON, user);
    String rdfContentType = SupportedFormat.RDF_XML;
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    serializer.serialize(out, metadata, rdfContentType);
    String rdfContent = new String(out.toByteArray(), UTF8);
    MultipartEntityBuilder ciBuilder = MultipartEntityBuilder.create();
    //add the metadata
    /*
         * NOTE: We need here to override the getFilename, because this MUST
         *       BE the URI of the ContentItem. This is important, because the
         *       Metadata do contain triples about that ContentItem and therefore
         *       it MUST BE assured that the URI of the ContentItem created by
         *       the Stanbol Enhancer is the same of as the URI used in the
         *       Metadata!
         */
    ciBuilder.addPart("metadata", new StringBody(rdfContent, ContentType.create(rdfContentType).withCharset(UTF8)) {

        @Override
        public String getFilename() {
            //uri of the ContentItem
            return contentItemId.getUnicodeString();
        }
    });
    //add the content
    ciBuilder.addTextBody("content", HTML_CONTENT, ContentType.TEXT_HTML.withCharset(UTF8));
    //send the request
    String receivedContent = executor.execute(builder.buildPostRequest(getEndpoint()).withHeader("Accept", "text/rdf+nt").withEntity(ciBuilder.build())).assertStatus(200).assertContentRegexp(//and the expected enhancements based on the parsed content
    "http://purl.org/dc/terms/creator.*LanguageDetectionEnhancementEngine", "http://purl.org/dc/terms/language.*en", "http://fise.iks-project.eu/ontology/entity-label.*Paris", "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*NamedEntityExtractionEnhancementEngine", "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley", //additional enhancements based on parsed metadata
    "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Germany.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/NATO.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Silvio_Berlusconi.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Europe.*").getContent();
    log.debug("Content:\n{}\n", receivedContent);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) Graph(org.apache.clerezza.commons.rdf.Graph) MultipartEntityBuilder(org.apache.http.entity.mime.MultipartEntityBuilder) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) StringBody(org.apache.http.entity.mime.content.StringBody) SimpleGraph(org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Test(org.junit.Test)

Example 52 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class EntityLinkingEngineTest method testEngine.

/**
     * This tests if the Enhancements created by the Engine confirm to the
     * rules defined for the Stanbol Enhancement Structure.
     * @throws IOException
     * @throws EngineException
     */
@Test
public void testEngine() throws IOException, EngineException {
    EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
    linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
    //this is assumed by this test
    linkerConfig.setMinFoundTokens(2);
    EntityLinkingEngine engine = new EntityLinkingEngine("dummy", searcher, new TextProcessingConfig(), linkerConfig, labelTokenizer);
    ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
    //tells the engine that this is an English text
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
    //and add the AnalysedText instance used for this test
    ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT);
    //compute the enhancements
    engine.computeEnhancements(ci);
    //validate the enhancement results
    Map<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
    expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
    expectedValues.put(DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(engine.getClass().getName()));
    //adding null as expected for confidence makes it a required property
    expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
    //validate create fise:TextAnnotations
    int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
    assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
    //validate create fise:EntityAnnotations
    int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
    assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) EntityLinkerConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig) TextProcessingConfig(org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 53 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class EnhancementEngineHelper method setOccurrence.

/**
     * This method sets the fise:start, fise:end, fise:selection-prefix, 
     * fise:selected-text and fise:selection-suffix properties for the 
     * parsed fise:TextAnnotation instance according to the parsed parameters.<p>
     * While it is intended to be used for TextAnnotations this method can also
     * be used to add the mentioned properties to {@link IRI}s with different
     * type.<p>
     * <b>NOTE</b> the <code>allowSelectionHeadTail</code>: This parameter allows
     * to deactivate the usage of fise:selection-head and fise:selection-tail.
     * Typically users should parse <code>false</code> in case of 'named entities'
     * and <code>true</code> in case sections of the text (e.g. phrases, sentences,
     * chapters ...) are selected.
     * @param metadata The RDF graph to add the information
     * @param textAnnotation the IRI of the fise:TextAnnotation
     * @param content the plain text content as String
     * @param start the start index of the occurrence 
     * @param end the end index of the occurrence
     * @param lang the lanugage of the content or <code>null</code> if not known
     * @param prefixSuffixSize the size of the prefix, suffix. If the parsed
     * value &lt; 3 than the default 10 is used.
     * @param allowSelectionHeadTail if <code>true</code> the fise:selection-head
     * and fise:selection-tail properties are used instead of fise:selected-text
     * if the selected text is longer as <code>Math.max(30, prefixSuffixSize*5);</code>.
     * If <code>false</code> the fise:selected-text is added regardless of the
     * size of the selected area.
     * @since 0.11.0
     */
public static void setOccurrence(Graph metadata, IRI textAnnotation, String content, Integer start, Integer end, Language lang, int prefixSuffixSize, boolean allowSelectionHeadTail) {
    //set start, end
    metadata.add(new TripleImpl(textAnnotation, ENHANCER_START, lf.createTypedLiteral(start)));
    metadata.add(new TripleImpl(textAnnotation, ENHANCER_END, lf.createTypedLiteral(end)));
    //set selection prefix and suffix (TextAnnotation new model)
    prefixSuffixSize = prefixSuffixSize < MIN_PREFIX_SUFFIX_SIZE ? DEFAULT_PREFIX_SUFFIX_LENGTH : prefixSuffixSize;
    metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_PREFIX, new PlainLiteralImpl(content.substring(Math.max(0, start - prefixSuffixSize), start), lang)));
    metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_SUFFIX, new PlainLiteralImpl(content.substring(end, Math.min(content.length(), end + prefixSuffixSize)), lang)));
    //set the selected text (or alternatively head and tail)
    int maxSelectedTextSize = Math.max(MIN_SELECTEN_HEAD_TAIL_USAGE_LENGTH, prefixSuffixSize * 5);
    if (!allowSelectionHeadTail || end - start <= maxSelectedTextSize) {
        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTED_TEXT, new PlainLiteralImpl(content.substring(start, end), lang)));
    } else {
        //selected area to long for fise:selected-text
        //use fise:selection-head and fise:selection-tail instead
        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_HEAD, new PlainLiteralImpl(content.substring(start, start + prefixSuffixSize), lang)));
        metadata.add(new TripleImpl(textAnnotation, ENHANCER_SELECTION_TAIL, new PlainLiteralImpl(content.substring(end - prefixSuffixSize, end), lang)));
    }
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 54 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class ExecutionPlanHelper method writeEnhancementProperty.

/**
     * Writes enhancement property value(s) for the parsed node, property to the
     * execution plan graph.
     * @param ep the RDF graph holding the execution plan
     * @param epNode the execution node
     * @param property the property
     * @param value the value(s). {@link Collection} and <code>Object[]</code> are
     * supported for multiple values.
     * @throws NullPointerException if any of the parsed parameter is <code>null</code>
     */
@SuppressWarnings("unchecked")
private static void writeEnhancementProperty(Graph ep, BlankNodeOrIRI epNode, IRI property, Object value) {
    Collection<Object> values;
    if (value instanceof Collection<?>) {
        values = (Collection<Object>) value;
    } else if (value instanceof Object[]) {
        values = Arrays.asList((Object[]) value);
    } else {
        values = Collections.singleton(value);
    }
    for (Object v : values) {
        if (v != null) {
            Literal literal;
            if (v instanceof String) {
                literal = new PlainLiteralImpl((String) v);
            } else {
                try {
                    literal = lf.createTypedLiteral(v);
                } catch (NoConvertorException e) {
                    log.warn("Use toString() value '{}' for EnhancementProperty " + "'{}' as no TypedLiteral converter is registered for " + "class {}", new Object[] { v, property, v.getClass().getName() });
                    literal = new PlainLiteralImpl(v.toString());
                }
            }
            ep.add(new TripleImpl(epNode, property, literal));
        }
    }
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) Literal(org.apache.clerezza.commons.rdf.Literal) NoConvertorException(org.apache.clerezza.rdf.core.NoConvertorException) Collection(java.util.Collection) EnhancementEngineHelper.getString(org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper.getString) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Example 55 with PlainLiteralImpl

use of org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl in project stanbol by apache.

the class ExecutionPlanHelper method writeExecutionNode.

/**
     * Writes all triples for an ep:ExecutionNode to the parsed {@link Graph}.
     * An {@link BlankNode} is use for representing the execution node resource.
     * @param graph the graph to write the triples. MUST NOT be empty
     * @param epNode the BlankNodeOrIRI representing the ep:ExecutionPlan
     * @param engineName the name of the engine. MUST NOT be <code>null</code> nor empty
     * @param optional if the execution of this node is optional or required
     * @param dependsOn other nodes that MUST BE executed before this one. Parse 
     * <code>null</code> or an empty set if none.
     * @param enhProps the EnhancementProperties for this ExecutionNode or
     * <code>null</code> if none
     * @return the resource representing the added ep:ExecutionNode.
     * @since 0.12.1
     */
public static BlankNodeOrIRI writeExecutionNode(Graph graph, BlankNodeOrIRI epNode, String engineName, boolean optional, Set<BlankNodeOrIRI> dependsOn, Map<String, Object> enhProps) {
    if (graph == null) {
        throw new IllegalArgumentException("The parsed Graph MUST NOT be NULL!");
    }
    if (engineName == null || engineName.isEmpty()) {
        throw new IllegalArgumentException("The parsed Engine name MUST NOT be NULL nor empty!");
    }
    if (epNode == null) {
        throw new IllegalArgumentException("The ep:ExecutionPlan instance MUST NOT be NULL!");
    }
    BlankNodeOrIRI node = new BlankNode();
    graph.add(new TripleImpl(epNode, HAS_EXECUTION_NODE, node));
    graph.add(new TripleImpl(node, RDF_TYPE, EXECUTION_NODE));
    graph.add(new TripleImpl(node, ENGINE, new PlainLiteralImpl(engineName)));
    if (dependsOn != null) {
        for (BlankNodeOrIRI dependend : dependsOn) {
            if (dependend != null) {
                graph.add(new TripleImpl(node, DEPENDS_ON, dependend));
            }
        }
    }
    graph.add(new TripleImpl(node, OPTIONAL, lf.createTypedLiteral(optional)));
    writeEnhancementProperties(graph, node, engineName, enhProps);
    return node;
}
Also used : PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) BlankNode(org.apache.clerezza.commons.rdf.BlankNode) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)

Aggregations

PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)82 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)69 IRI (org.apache.clerezza.commons.rdf.IRI)58 Graph (org.apache.clerezza.commons.rdf.Graph)34 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)20 Language (org.apache.clerezza.commons.rdf.Language)19 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 Literal (org.apache.clerezza.commons.rdf.Literal)16 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)16 IOException (java.io.IOException)14 HashMap (java.util.HashMap)13 Triple (org.apache.clerezza.commons.rdf.Triple)12 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)12 ArrayList (java.util.ArrayList)11 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)11 LiteralFactory (org.apache.clerezza.rdf.core.LiteralFactory)10 ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)10 Test (org.junit.Test)10 HashSet (java.util.HashSet)8 SOAPException (javax.xml.soap.SOAPException)6