use of org.apache.http.entity.InputStreamEntity in project stanbol by apache.
the class RestfulLangidentEngine method computeEnhancements.
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
* <p/>
* This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
* stores it as a new part in the content item. The metadata is not changed.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(final ContentItem ci) throws EngineException {
//get the plain text Blob
Map.Entry<IRI, Blob> textBlob = getPlainText(this, ci, false);
Blob blob = textBlob.getValue();
//send the text to the server
final HttpPost request = new HttpPost(serviceUrl);
request.setEntity(new InputStreamEntity(blob.getStream(), blob.getContentLength(), ContentType.create(blob.getMimeType(), blob.getParameter().get("charset"))));
//execute the request
List<LangSuggestion> detected;
try {
detected = AccessController.doPrivileged(new PrivilegedExceptionAction<List<LangSuggestion>>() {
public List<LangSuggestion> run() throws ClientProtocolException, IOException {
return httpClient.execute(request, new LangIdentResponseHandler(ci, objectMapper));
}
});
} catch (PrivilegedActionException pae) {
Exception e = pae.getException();
if (e instanceof ClientProtocolException) {
throw new EngineException(this, ci, "Exception while executing Request " + "on RESTful Language Identification Service at " + serviceUrl, e);
} else if (e instanceof IOException) {
throw new EngineException(this, ci, "Exception while executing Request " + "on RESTful Language Identification Service at " + serviceUrl, e);
} else {
throw RuntimeException.class.cast(e);
}
}
Graph metadata = ci.getMetadata();
log.debug("Detected Languages for ContentItem {} and Blob {}");
ci.getLock().writeLock().lock();
try {
//write TextAnnotations for the detected languages
for (LangSuggestion suggestion : detected) {
// add a hypothesis
log.debug(" > {}@{}", suggestion.getLanguage(), suggestion.hasProbability() ? suggestion.getProbability() : "-,--");
IRI textEnhancement = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(textEnhancement, DC_LANGUAGE, new PlainLiteralImpl(suggestion.getLanguage())));
metadata.add(new TripleImpl(textEnhancement, DC_TYPE, DCTERMS_LINGUISTIC_SYSTEM));
if (suggestion.hasProbability()) {
metadata.add(new TripleImpl(textEnhancement, ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(suggestion.getProbability())));
}
}
} finally {
ci.getLock().writeLock().unlock();
}
}
use of org.apache.http.entity.InputStreamEntity in project stanbol by apache.
the class TikaXmlDocumentOsgiTest method testDocx2.
/**
* Tests docx format
* engine
* @throws Exception
*/
@Test
public void testDocx2() throws Exception {
InputStream in = EngineEnhancementRequestTest.class.getClassLoader().getResourceAsStream("Vorlage_Protokoll.docx");
Assert.assertNotNull("Unable to find test resource 'Vorlage_Protokoll.docx'", in);
executor.execute(builder.buildPostRequest(getEndpoint() + "/engine/tika?omitMetadata=true").withHeader("Accept", "text/plain").withHeader("Content-Type", "application/vnd.openxmlformats-officedocument.wordprocessingml.document").withEntity(new InputStreamEntity(in, -1))).assertStatus(//not interested in the results, just that it worked
200);
}
use of org.apache.http.entity.InputStreamEntity in project stanbol by apache.
the class EntityhubTest method buildImportRdfData.
/**
* Imports/updates RDF data of the file to the entityhub with the possibility
* to restrict imports/updates to the parsed uri
* @param file the file with the RDF data (needs to be in the classpath)
* @param create if <code>true</code> the data are created (POST) otherwise
* updated (PUT).
* @param uri if not <code>null</code> only data of this URI are imported by
* specifying the id parameter
*/
protected Request buildImportRdfData(InputStream in, String contentType, boolean create, String uri) {
Assert.assertNotNull(in);
Assert.assertNotNull(contentType);
Request request;
String path;
if (uri != null) {
path = builder.buildUrl("/entityhub/entity", "id", uri);
} else {
path = builder.buildUrl("/entityhub/entity");
}
if (create) {
request = builder.buildOtherRequest(new HttpPost(path));
} else {
request = builder.buildOtherRequest(new HttpPut(path));
}
//set the HttpEntity (both PUT and POST are HttpEntityEnclosingRequests)
((HttpEntityEnclosingRequest) request.getRequest()).setEntity(new InputStreamEntity(in, -1));
//finally set the correct content-type of the provided data
//currently fixed to "application/rdf+xml"
request.getRequest().setHeader("Content-Type", contentType);
return request;
}
use of org.apache.http.entity.InputStreamEntity in project stanbol by apache.
the class EngineEnhancementRequestTest method testPlainTextExtraction.
/**
* Tests plain text extraction for an request directly sent to the tika
* engine
* @throws Exception
*/
@Test
public void testPlainTextExtraction() throws Exception {
InputStream in = EngineEnhancementRequestTest.class.getClassLoader().getResourceAsStream("test.pdf");
Assert.assertNotNull("Unable to find test resource 'test.pdf'", in);
executor.execute(builder.buildPostRequest(getEndpoint() + "/engine/tika?omitMetadata=true").withHeader("Accept", "text/plain").withEntity(new InputStreamEntity(in, -1))).assertStatus(200).assertContentRegexp(//only that the Enhancer REST API works also with engines!
"The Apache Stanbol Enhancer", "The Stanbol enhancer can detect famous cities such as Paris");
}
use of org.apache.http.entity.InputStreamEntity in project stanbol by apache.
the class TikaXmlDocumentOsgiTest method testDocx.
/**
* Tests docx format
* engine
* @throws Exception
*/
@Test
public void testDocx() throws Exception {
InputStream in = EngineEnhancementRequestTest.class.getClassLoader().getResourceAsStream("testWORD.docx");
Assert.assertNotNull("Unable to find test resource 'testWORD.docx'", in);
executor.execute(builder.buildPostRequest(getEndpoint() + "/engine/tika?omitMetadata=true").withHeader("Accept", "text/plain").withHeader("Content-Type", "application/vnd.openxmlformats-officedocument.wordprocessingml.document").withEntity(new InputStreamEntity(in, -1))).assertStatus(//not interested in the results, just that it worked
200);
}
Aggregations