use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testMp4.
/**
* Tests mappings for the Mp4 metadata extraction capabilities added to
* Tika 1.1 (STANBOL-627)
* @throws EngineException
* @throws IOException
* @throws ParseException
*/
@Test
public void testMp4() throws EngineException, IOException, ParseException {
log.info(">>> testMp4 <<<");
ContentItem ci = createContentItem("testMP4.m4a", "audio/mp4");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "Test Title", "Test Artist", "Test Album");
// validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
// Test AudioTrack metadata
BlankNodeOrIRI audioTrack = verifyBlankNodeOrIRI(ci, new IRI(NamespaceEnum.media + "hasTrack"));
// types
verifyValues(ci, audioTrack, RDF.type, new IRI(NamespaceEnum.media + "MediaFragment"), new IRI(NamespaceEnum.media + "Track"), new IRI(NamespaceEnum.media + "AudioTrack"));
// properties
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasFormat"), XSD.string, "Stereo");
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "samplingRate"), XSD.int_, "44100");
verifyValue(ci, audioTrack, new IRI(NamespaceEnum.media + "hasCompression"), XSD.string, "M4A");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testGEOMetadata.
@Test
public void testGEOMetadata() throws EngineException, IOException, ParseException {
log.info(">>> testGEOMetadata <<<");
// first validate Media RDFTerm Ontology
IRI hasLocation = new IRI(NamespaceEnum.media + "hasLocation");
IRI locationLatitude = new IRI(NamespaceEnum.media + "locationLatitude");
IRI locationLongitude = new IRI(NamespaceEnum.media + "locationLongitude");
// IRI locationAltitude = new IRI(NamespaceEnum.media+"locationAltitude");
// "video/x-ms-asf");
ContentItem ci = createContentItem("testJPEG_GEO.jpg", OCTET_STREAM.toString());
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Iterator<Triple> it = ci.getMetadata().filter(ci.getUri(), hasLocation, null);
assertTrue(it.hasNext());
RDFTerm r = it.next().getObject();
assertFalse(it.hasNext());
assertTrue(r instanceof BlankNodeOrIRI);
BlankNodeOrIRI location = verifyBlankNodeOrIRI(ci, hasLocation);
// lat
verifyValue(ci, location, locationLatitude, XSD.double_, "12.54321");
// long
verifyValue(ci, location, locationLongitude, XSD.double_, "-54.1234");
// second the GEO ont
IRI lat = new IRI(NamespaceEnum.geo + "lat");
IRI lon = new IRI(NamespaceEnum.geo + "long");
// lat
verifyValue(ci, lat, XSD.double_, "12.54321");
// long
verifyValue(ci, lon, XSD.double_, "-54.1234");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testExifMetadata.
@Test
public void testExifMetadata() throws EngineException, ParseException, IOException {
log.info(">>> testExifMetadata <<<");
String exif = "http://www.semanticdesktop.org/ontologies/2007/05/10/nexif#";
ContentItem ci = createContentItem("testJPEG_EXIF.jpg", "image/jpeg");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
verifyValue(ci, new IRI(exif + "make"), null, "Canon");
verifyValue(ci, new IRI(exif + "software"), null, "Adobe Photoshop CS3 Macintosh");
verifyValue(ci, new IRI(exif + "dateTimeOriginal"), XSD.dateTime, "2009-08-11T09:09:45");
verifyValue(ci, new IRI(exif + "relatedImageWidth"), XSD.int_, "100");
verifyValue(ci, new IRI(exif + "fNumber"), XSD.double_, "5.6");
verifyValue(ci, new IRI(exif + "model"), null, "Canon EOS 40D");
verifyValue(ci, new IRI(exif + "isoSpeedRatings"), XSD.int_, "400");
verifyValue(ci, new IRI(exif + "xResolution"), XSD.double_, "240.0");
verifyValue(ci, new IRI(exif + "flash"), XSD.boolean_, "false");
verifyValue(ci, new IRI(exif + "exposureTime"), XSD.double_, "6.25E-4");
verifyValue(ci, new IRI(exif + "yResolution"), XSD.double_, "240.0");
verifyValue(ci, new IRI(exif + "resolutionUnit"), XSD.string, "Inch");
verifyValue(ci, new IRI(exif + "focalLength"), XSD.double_, "194.0");
verifyValue(ci, new IRI(exif + "relatedImageLength"), XSD.int_, "68");
verifyValue(ci, new IRI(exif + "bitsPerSample"), XSD.int_, "8");
// also Media Ontology mappings for Exif
verifyValue(ci, new IRI(NamespaceEnum.media + "frameHeight"), XSD.int_, "68");
verifyValue(ci, new IRI(NamespaceEnum.media + "frameWidth"), XSD.int_, "100");
verifyValue(ci, new IRI(NamespaceEnum.media + "hasFormat"), null, "image/jpeg");
verifyValue(ci, new IRI(NamespaceEnum.media + "creationDate"), XSD.dateTime, "2009-08-11T09:09:45");
verifyValues(ci, new IRI(NamespaceEnum.media + "hasKeyword"), null, "serbor", "moscow-birds", "canon-55-250");
// and finally the mapped DC properties
verifyValue(ci, new IRI(NamespaceEnum.dc + "format"), null, "image/jpeg");
verifyValue(ci, new IRI(NamespaceEnum.dc + "created"), XSD.dateTime, "2009-08-11T09:09:45");
verifyValue(ci, new IRI(NamespaceEnum.dc + "modified"), XSD.dateTime, "2009-10-02T23:02:49");
verifyValues(ci, new IRI(NamespaceEnum.dc + "subject"), null, "serbor", "moscow-birds", "canon-55-250");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class TikaEngineTest method testRtf.
@Test
public void testRtf() throws EngineException, IOException {
log.info(">>> testRtf <<<");
ContentItem ci = createContentItem("test.rtf", "application/rtf");
assertFalse(engine.canEnhance(ci) == CANNOT_ENHANCE);
engine.computeEnhancements(ci);
Entry<IRI, Blob> contentPart = ContentItemHelper.getBlob(ci, singleton("text/plain"));
assertNotNull(contentPart);
Blob plainTextBlob = contentPart.getValue();
assertNotNull(plainTextBlob);
assertContentRegexp(plainTextBlob, "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris and people such as Bob Marley.");
// validate XHTML results
contentPart = ContentItemHelper.getBlob(ci, singleton("application/xhtml+xml"));
assertNotNull(contentPart);
Blob xhtmlBlob = contentPart.getValue();
assertNotNull(xhtmlBlob);
assertContentRegexp(xhtmlBlob, "<html xmlns=\"http://www.w3.org/1999/xhtml\">", "<head>", "<meta name=", "<title>", "The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities", "</body></html>");
}
use of org.apache.clerezza.commons.rdf.IRI in project stanbol by apache.
the class SentimentSummarizationEngine method writeSentiment.
private void writeSentiment(ContentItem ci, Section section, List<SentimentPhrase> sectionPhrases) {
if (section == null || sectionPhrases == null || sectionPhrases.isEmpty()) {
// nothing to do
return;
}
IRI enh = createTextEnhancement(ci, this);
Graph metadata = ci.getMetadata();
if (section.getType() == SpanTypeEnum.Sentence) {
// TODO use the fise:TextAnnotation new model for
// add start/end positions
metadata.add(new TripleImpl(enh, ENHANCER_START, lf.createTypedLiteral(section.getStart())));
metadata.add(new TripleImpl(enh, ENHANCER_END, lf.createTypedLiteral(section.getEnd())));
}
// TODO: Summarize the sentiments of this section
// add the sentiment information
double positiveSent = 0.0;
int positiveCount = 0;
double negativeSent = 0.0;
int negativeCount = 0;
for (SentimentPhrase sentPhrase : sectionPhrases) {
if (sentPhrase.getNegativeSentiment() != null) {
double neg = sentPhrase.getNegativeSentiment();
negativeSent = negativeSent + (neg * neg);
negativeCount++;
}
if (sentPhrase.getPositiveSentiment() != null) {
double pos = sentPhrase.getPositiveSentiment();
positiveSent = positiveSent + (pos * pos);
positiveCount++;
}
}
if (positiveCount > 0) {
positiveSent = Math.sqrt(positiveSent / (double) positiveCount);
metadata.add(new TripleImpl(enh, POSITIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(positiveSent))));
}
if (negativeCount > 0) {
negativeSent = Math.sqrt(negativeSent / (double) negativeCount) * -1;
metadata.add(new TripleImpl(enh, NEGATIVE_SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(negativeSent))));
}
metadata.add(new TripleImpl(enh, SENTIMENT_PROPERTY, lf.createTypedLiteral(Double.valueOf(negativeSent + positiveSent))));
// add the Sentiment type as well as the type of the SSO Ontology
metadata.add(new TripleImpl(enh, DC_TYPE, SENTIMENT_TYPE));
IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(section.getType());
if (ssoType != null) {
metadata.add(new TripleImpl(enh, DC_TYPE, ssoType));
}
if (section.getType() == SpanTypeEnum.Text) {
metadata.add(new TripleImpl(enh, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
}
}
Aggregations