use of org.apache.tika.language.detect.LanguageHandler in project tika by apache.
the class Language method languageDetectionWithHandler.
public static void languageDetectionWithHandler() throws Exception {
LanguageHandler handler = new LanguageHandler();
new AutoDetectParser().parse(System.in, handler, new Metadata(), new ParseContext());
LanguageResult result = handler.getLanguage();
System.out.println(result.getLanguage());
}
use of org.apache.tika.language.detect.LanguageHandler in project tika by apache.
the class MetadataResource method parseMetadata.
private Metadata parseMetadata(InputStream is, MultivaluedMap<String, String> httpHeaders, UriInfo info) throws IOException {
final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
Parser parser = TikaResource.createParser();
TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
//no need to pass parser for embedded document parsing
TikaResource.fillParseContext(context, httpHeaders, null);
TikaResource.logRequest(LOG, info, metadata);
TikaResource.parse(parser, LOG, info.getPath(), is, new LanguageHandler() {
public void endDocument() {
metadata.set("language", getLanguage().getLanguage());
}
}, metadata, context);
return metadata;
}
use of org.apache.tika.language.detect.LanguageHandler in project tika by apache.
the class LanguageDetectingParser method parse.
public void parse(InputStream stream, ContentHandler handler, final Metadata metadata, ParseContext context) throws SAXException, IOException, TikaException {
LanguageHandler langHandler = new LanguageHandler();
ContentHandler tee = new TeeContentHandler(handler, langHandler);
super.parse(stream, tee, metadata, context);
LanguageResult result = langHandler.getLanguage();
if (result.isReasonablyCertain()) {
metadata.set(TikaCoreProperties.LANGUAGE, result.getLanguage());
}
}
use of org.apache.tika.language.detect.LanguageHandler in project tika by apache.
the class RecursiveMetadataResource method parseMetadata.
private MetadataList parseMetadata(InputStream is, MultivaluedMap<String, String> httpHeaders, UriInfo info, String handlerTypeName) throws Exception {
final Metadata metadata = new Metadata();
final ParseContext context = new ParseContext();
Parser parser = TikaResource.createParser();
// TODO: parameterize choice of max chars/max embedded attachments
BasicContentHandlerFactory.HANDLER_TYPE type = BasicContentHandlerFactory.parseHandlerType(handlerTypeName, DEFAULT_HANDLER_TYPE);
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(parser, new BasicContentHandlerFactory(type, -1));
TikaResource.fillMetadata(parser, metadata, context, httpHeaders);
// no need to add parser to parse recursively
TikaResource.fillParseContext(context, httpHeaders, null);
TikaResource.logRequest(LOG, info, metadata);
TikaResource.parse(wrapper, LOG, info.getPath(), is, new LanguageHandler() {
public void endDocument() {
metadata.set("language", getLanguage().getLanguage());
}
}, metadata, context);
return new MetadataList(wrapper.getMetadata());
}
Aggregations