use of org.apache.http.entity.mime.MultipartEntityBuilder in project stanbol by apache.
the class MultipartRequestTest method testUploadMultipleContents.
/**
* This uploads the HTML as well as the plain text version of an content.
* This allows it CMS to parse already available alternate content versions
* in a single request. Stanbol can than still use the original content
* (e.g. to extract metadata) but other engines that require the alternate
* version (e.g. plain text version) of an document will directly use the
* parsed version .<p>
* This UnitTest ensures this by adding a "secret" extension the to plain
* text version and than checks if the two entities mentioned in that
* part are included in the extracted entities.
* @throws IOException
*/
@Test
public void testUploadMultipleContents() throws IOException {
//It is a secret, that Berlin is the capital of Germany
String extraTextConent = TEXT_CONTENT + "\nIt is a secret, that the city of Berlin is the capital of Germany since 1990.";
//The multipartBuilder used to construct the contentItem for the contentItem
MultipartEntityBuilder ciBuilder = MultipartEntityBuilder.create();
String boundary = "contentItem-47jjksnbue73fnis";
ciBuilder.setBoundary(boundary);
//use a small extension to deal with multipart/alternate
Map<String, ContentBody> alternates = new LinkedHashMap<String, ContentBody>();
alternates.put("http://www.example.com/test.html", new StringBody(HTML_CONTENT, ContentType.TEXT_HTML.withCharset(UTF8)));
alternates.put("http://www.example.com/test.txt", new StringBody(extraTextConent, ContentType.TEXT_PLAIN.withCharset(UTF8)));
ciBuilder.addPart("content", new MultipartContentBody(alternates, "contentParts", ContentType.create("multipart/alternate")));
String receivedContent = executor.execute(builder.buildPostRequest(getEndpoint()).withHeader("Accept", "text/rdf+nt").withEntity(ciBuilder.build())).assertStatus(200).assertContentRegexp(//and the expected enhancements in the metadata
"http://purl.org/dc/terms/creator.*LanguageDetectionEnhancementEngine", "http://purl.org/dc/terms/language.*en", "http://fise.iks-project.eu/ontology/entity-label.*Paris", "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*NamedEntityExtractionEnhancementEngine", "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley", //check also for expeted entities extracted from the secret Text part!
"http://fise.iks-project.eu/ontology/entity-label.*Berlin", "http://fise.iks-project.eu/ontology/entity-label.*Germany").getContent();
log.debug("Content:\n{}\n", receivedContent);
}
use of org.apache.http.entity.mime.MultipartEntityBuilder in project stanbol by apache.
the class MultipartRequestTest method testUploadWithMetadata.
/**
* Stanbol also supports to upload pre-existing metadata with the content.
* This UnitTest uses an example that parsed TextAnnotations for free text
* tags provided by users that are than linked to Entities in DBPedia
* @throws IOException
*/
@Test
public void testUploadWithMetadata() throws IOException {
//create the metadata
RDFTerm user = new PlainLiteralImpl("Rupert Westenthaler");
final IRI contentItemId = new IRI("http://www.example.com/test.html");
Graph metadata = new SimpleGraph();
addTagAsTextAnnotation(metadata, contentItemId, "Germany", DBPEDIA_PLACE, user);
addTagAsTextAnnotation(metadata, contentItemId, "Europe", DBPEDIA_PLACE, user);
addTagAsTextAnnotation(metadata, contentItemId, "NATO", DBPEDIA_ORGANISATION, user);
addTagAsTextAnnotation(metadata, contentItemId, "Silvio Berlusconi", DBPEDIA_PERSON, user);
String rdfContentType = SupportedFormat.RDF_XML;
ByteArrayOutputStream out = new ByteArrayOutputStream();
serializer.serialize(out, metadata, rdfContentType);
String rdfContent = new String(out.toByteArray(), UTF8);
MultipartEntityBuilder ciBuilder = MultipartEntityBuilder.create();
//add the metadata
/*
* NOTE: We need here to override the getFilename, because this MUST
* BE the URI of the ContentItem. This is important, because the
* Metadata do contain triples about that ContentItem and therefore
* it MUST BE assured that the URI of the ContentItem created by
* the Stanbol Enhancer is the same of as the URI used in the
* Metadata!
*/
ciBuilder.addPart("metadata", new StringBody(rdfContent, ContentType.create(rdfContentType).withCharset(UTF8)) {
@Override
public String getFilename() {
//uri of the ContentItem
return contentItemId.getUnicodeString();
}
});
//add the content
ciBuilder.addTextBody("content", HTML_CONTENT, ContentType.TEXT_HTML.withCharset(UTF8));
//send the request
String receivedContent = executor.execute(builder.buildPostRequest(getEndpoint()).withHeader("Accept", "text/rdf+nt").withEntity(ciBuilder.build())).assertStatus(200).assertContentRegexp(//and the expected enhancements based on the parsed content
"http://purl.org/dc/terms/creator.*LanguageDetectionEnhancementEngine", "http://purl.org/dc/terms/language.*en", "http://fise.iks-project.eu/ontology/entity-label.*Paris", "http://purl.org/dc/terms/creator.*org.apache.stanbol.enhancer.engines.opennlp.*NamedEntityExtractionEnhancementEngine", "http://fise.iks-project.eu/ontology/entity-label.*Bob Marley", //additional enhancements based on parsed metadata
"http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Germany.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/NATO.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Silvio_Berlusconi.*", "http://fise.iks-project.eu/ontology/entity-reference.*http://dbpedia.org/resource/Europe.*").getContent();
log.debug("Content:\n{}\n", receivedContent);
}
use of org.apache.http.entity.mime.MultipartEntityBuilder in project stanbol by apache.
the class MultipartRequestTest method testMissingContent.
@Test
public void testMissingContent() throws IOException {
final IRI contentItemId = new IRI("http://www.example.com/test.html");
String rdfContentType = SupportedFormat.RDF_XML;
String rdfContent = getDummyRdfMetadata(contentItemId, rdfContentType);
MultipartEntityBuilder ciBuilder = MultipartEntityBuilder.create();
ciBuilder.addTextBody("metadata", rdfContent, ContentType.create(rdfContentType, UTF8));
String receivedContent = executor.execute(builder.buildPostRequest(getEndpoint()).withHeader("Accept", "text/rdf+nt").withEntity(ciBuilder.build())).assertStatus(//BAD request
400).getContent();
//check also the error message
Assert.assertTrue(receivedContent.contains("The parsed multipart content item does not contain any content."));
}
use of org.apache.http.entity.mime.MultipartEntityBuilder in project stanbol by apache.
the class MultipartRequestTest method testContentBeforeMetadata.
@Test
public void testContentBeforeMetadata() throws IOException {
final IRI contentItemId = new IRI("http://www.example.com/test.html");
String rdfContentType = SupportedFormat.RDF_XML;
String rdfContent = getDummyRdfMetadata(contentItemId, rdfContentType);
MultipartEntityBuilder ciBuilder = MultipartEntityBuilder.create();
ciBuilder.addTextBody("content", HTML_CONTENT, ContentType.TEXT_HTML.withCharset(UTF8));
ciBuilder.addTextBody("metadata", rdfContent, ContentType.create(rdfContentType, UTF8));
String receivedContent = executor.execute(builder.buildPostRequest(getEndpoint()).withHeader("Accept", "text/rdf+nt").withEntity(ciBuilder.build())).assertStatus(//BAD request
400).getContent();
//check also the error message
Assert.assertTrue(receivedContent.contains("The Multipart MIME part with the 'metadata' MUST BE before the " + "MIME part containing the 'content'"));
}
Aggregations