Search in sources :

Example 11 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TikaEngineTest method testText.

/**
     * Tests that text is not processed
     */
@Test
public void testText() throws EngineException, IOException {
    log.info(">>> testText <<<");
    String text = "The Stanbol enhancer can detect famous cities such as " + "Paris and people such as Bob Marley.";
    ContentItem ci = ciFactory.createContentItem(new StringSource(text));
    Assert.assertEquals(1, ContentItemHelper.getContentParts(ci, Blob.class).size());
}
Also used : StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 12 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TikaEngineTest method testGEOMetadata.

@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException {
    log.info(">>> testGEOMetadata <<<");
    //first validate Media RDFTerm Ontology
    IRI hasLocation = new IRI(NamespaceEnum.media + "hasLocation");
    IRI locationLatitude = new IRI(NamespaceEnum.media + "locationLatitude");
    IRI locationLongitude = new IRI(NamespaceEnum.media + "locationLongitude");
    //IRI locationAltitude = new IRI(NamespaceEnum.media+"locationAltitude");
    //"video/x-ms-asf");
    ContentItem ci = createContentItem("testJPEG_GEO.jpg", OCTET_STREAM.toString());
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), hasLocation, null);
    assertTrue(it.hasNext());
    RDFTerm r = it.next().getObject();
    assertFalse(it.hasNext());
    assertTrue(r instanceof BlankNodeOrIRI);
    BlankNodeOrIRI location = verifyBlankNodeOrIRI(ci, hasLocation);
    //lat
    verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
    //long
    verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
    //second the GEO ont
    IRI lat = new IRI(NamespaceEnum.geo + "lat");
    IRI lon = new IRI(NamespaceEnum.geo + "long");
    //lat
    verifyValue(ci, lat, XSD.double_, "12.54321");
    //long
    verifyValue(ci, lon, XSD.double_, "-54.1234");
}
Also used : Triple(org.apache.clerezza.commons.rdf.Triple) IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) Test(org.junit.Test)

Example 13 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class TikaEngineTest method testMetadata.

public void testMetadata() throws EngineException, ParseException, IOException {
    log.info(">>> testMetadata <<<");
    ContentItem ci = createContentItem("testMP3id3v24.mp3", "audio/mpeg");
    assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
    engine.computeEnhancements(ci);
    verifyValue(ci, new IRI(NamespaceEnum.dc + "creator"), null, "Test Artist");
    verifyValue(ci, new IRI(NamespaceEnum.dc + "title"), null, "Test Album");
    verifyValue(ci, new IRI(NamespaceEnum.dc + "format"), null, "audio/mpeg");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasFormat"), null, "audio/mpeg");
    verifyValue(ci, new IRI(NamespaceEnum.media + "mainOriginalTitle"), null, "Test Album");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasContributor"), null, "Test Artist");
    verifyValue(ci, new IRI(NamespaceEnum.media + "releaseDate"), XSD.string, "2008");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasGenre"), null, "Rock");
    verifyValue(ci, new IRI(NamespaceEnum.media + "hasCreator"), null, "Test Artist");
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) BlankNodeOrIRI(org.apache.clerezza.commons.rdf.BlankNodeOrIRI) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Example 14 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliAnalyzedTextSentimentAnalysisEngineTest method testEngine.

@Test
public void testEngine() throws IOException, EngineException {
    ContentItem ci = ciFactory.createContentItem(new StringSource(text));
    Assert.assertNotNull(ci);
    AnalysedText at = atFactory.createAnalysedText(ci, ci.getBlob());
    Assert.assertNotNull(at);
    ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("it")));
    Assert.assertEquals("it", EnhancementEngineHelper.getLanguage(ci));
    Assert.assertEquals("Can not enhance Test ContentItem", EnhancementEngine.ENHANCE_ASYNC, engine.canEnhance(ci));
    //compute the enhancements
    try {
        engine.computeEnhancements(ci);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
        //deactivate test
        return;
    }
    //now validate the enhancements
    int sentimentExpressionCnt = 0;
    for (Iterator<Token> tokens = at.getTokens(); tokens.hasNext(); ) {
        Token token = tokens.next();
        log.info("Token: {}", token);
        List<Value<Double>> sentimentExpressionsList = token.getAnnotations(NlpAnnotations.SENTIMENT_ANNOTATION);
        if (sentimentExpressionsList != null && sentimentExpressionsList.size() > 0)
            sentimentExpressionCnt++;
    }
    Assert.assertTrue("2 sentiment expressions should be recognized in: " + text, sentimentExpressionCnt == 2);
}
Also used : AnalysedText(org.apache.stanbol.enhancer.nlp.model.AnalysedText) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) Value(org.apache.stanbol.enhancer.nlp.model.annotation.Value) Token(org.apache.stanbol.enhancer.nlp.model.Token) StringSource(org.apache.stanbol.enhancer.servicesapi.impl.StringSource) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem) CeliAnalyzedTextLemmatizerEngineTest(org.apache.stanbol.enhancer.engines.celi.lemmatizer.impl.CeliAnalyzedTextLemmatizerEngineTest) Test(org.junit.Test)

Example 15 with ContentItem

use of org.apache.stanbol.enhancer.servicesapi.ContentItem in project stanbol by apache.

the class CeliSentimentAnalysisEngineTest method testInput.

private void testInput(String txt, String lang) throws EngineException, IOException {
    ContentItem ci = wrapAsContentItem(txt);
    try {
        // add a simple triple to statically define the language of the test content
        ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl(lang)));
        sentimentAnalysisEngine.computeEnhancements(ci);
        TestUtils.logEnhancements(ci);
        HashMap<IRI, RDFTerm> expectedValues = new HashMap<IRI, RDFTerm>();
        expectedValues.put(Properties.ENHANCER_EXTRACTED_FROM, ci.getUri());
        expectedValues.put(Properties.DC_CREATOR, LiteralFactory.getInstance().createTypedLiteral(sentimentAnalysisEngine.getClass().getName()));
        expectedValues.put(DC_TYPE, CeliConstants.SENTIMENT_EXPRESSION);
        int textAnnoNum = validateAllTextAnnotations(ci.getMetadata(), txt, expectedValues);
        log.info(textAnnoNum + " TextAnnotations found ...");
        assertTrue("2 sentiment expressions should be recognized in: " + txt, textAnnoNum == 2);
        int entityAnnoNum = EnhancementStructureHelper.validateAllEntityAnnotations(ci.getMetadata(), expectedValues);
        assertTrue("0 entity annotations should be recognized in: " + txt, entityAnnoNum == 0);
    } catch (EngineException e) {
        RemoteServiceHelper.checkServiceUnavailable(e);
    }
}
Also used : IRI(org.apache.clerezza.commons.rdf.IRI) PlainLiteralImpl(org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl) HashMap(java.util.HashMap) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) RDFTerm(org.apache.clerezza.commons.rdf.RDFTerm) TripleImpl(org.apache.clerezza.commons.rdf.impl.utils.TripleImpl) ContentItem(org.apache.stanbol.enhancer.servicesapi.ContentItem)

Aggregations

ContentItem (org.apache.stanbol.enhancer.servicesapi.ContentItem)73 Test (org.junit.Test)62 IRI (org.apache.clerezza.commons.rdf.IRI)46 BlankNodeOrIRI (org.apache.clerezza.commons.rdf.BlankNodeOrIRI)18 RDFTerm (org.apache.clerezza.commons.rdf.RDFTerm)18 HashMap (java.util.HashMap)15 TripleImpl (org.apache.clerezza.commons.rdf.impl.utils.TripleImpl)15 Blob (org.apache.stanbol.enhancer.servicesapi.Blob)15 StringSource (org.apache.stanbol.enhancer.servicesapi.impl.StringSource)13 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)12 PlainLiteralImpl (org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl)11 Graph (org.apache.clerezza.commons.rdf.Graph)8 Date (java.util.Date)6 SimpleGraph (org.apache.clerezza.commons.rdf.impl.utils.simple.SimpleGraph)6 Hashtable (java.util.Hashtable)5 AnalysedText (org.apache.stanbol.enhancer.nlp.model.AnalysedText)4 IOException (java.io.IOException)3 InputStream (java.io.InputStream)3 MediaType (javax.ws.rs.core.MediaType)3 Triple (org.apache.clerezza.commons.rdf.Triple)3