use of org.semanticdesktop.aperture.extractor.Extractor in project stanbol by apache.
the class MetaxaCore method extract.
/**
* Returns a model containing all the metadata that could be extracted
* by reading the given input stream using the given MIME type.
*
* @param in
* an {@link InputStream} where to read the document from
* @param docId
* a {@link String} with the document URI
* @param mimeType
* a {@link String} with the MIME type
* @return a {@link Model} containing the metadata or {@code null} if no
* extractor is available for the given MIME type
* @throws ExtractorException
* if there is an error when extracting the metadata
* @throws IOException
* if there is an error when reading the input stream
*/
public Model extract(InputStream in, URIImpl docId, String mimeType) throws ExtractorException, IOException {
@SuppressWarnings("rawtypes") Set factories = this.extractorRegistry.getExtractorFactories(mimeType);
Model result = null;
if (factories != null && !factories.isEmpty()) {
// get extractor from the first available factory
ExtractorFactory factory = (ExtractorFactory) factories.iterator().next();
Extractor extractor = factory.get();
RDFContainerFactory containerFactory = new RDFContainerFactoryImpl();
RDFContainer container = containerFactory.getRDFContainer(docId);
extractor.extract(container.getDescribedUri(), new BufferedInputStream(in, 8192), null, mimeType, container);
in.close();
result = container.getModel();
}
return result;
}
Aggregations