use of org.opensextant.xtext.converters.EmbeddedContentConverter in project Xponents by OpenSextant.
the class Decomposer method main.
public static void main(String[] args) {
gnu.getopt.Getopt opts = new gnu.getopt.Getopt("Decomposer", args, "hei:o:");
String input = null;
String output = null;
boolean embed = false;
try {
int c;
while ((c = opts.getopt()) != -1) {
switch(c) {
case 'i':
input = opts.getOptarg();
break;
case 'o':
output = opts.getOptarg();
break;
case 'e':
embed = true;
System.out.println("Saving conversions to Input folder. Output folder will be ignored.");
break;
default:
Decomposer.usage();
System.exit(1);
}
}
} catch (Exception err) {
Decomposer.usage();
System.exit(1);
}
EmbeddedContentConverter conv = new EmbeddedContentConverter(0x200000);
ConvertedDocument d;
try {
d = conv.convert(new File(input));
System.out.println("Found Doc:" + d.getFilepath());
} catch (IOException e) {
e.printStackTrace();
}
}
use of org.opensextant.xtext.converters.EmbeddedContentConverter in project Xponents by OpenSextant.
the class XText method setup.
/**
* If by this point you have taken items out of the requested types the
* converters will not be setup. E.g., if you don't want PDF or HTML
* conversion - those resources will not be initialized.
*
* @throws IOException
* on err
*/
public void setup() throws IOException {
defaultConversion = new DefaultConverter(maxBuffer);
embeddedConversion = new EmbeddedContentConverter(maxBuffer);
paths.configure();
// Invoke converter instances only as requested types suggest.
// If caller has removed file types from the list, then
String mimetype = "txt";
if (requestedFileTypes.contains(mimetype)) {
converters.put(mimetype, new TextTranscodingConverter());
}
mimetype = "html";
if (requestedFileTypes.contains(mimetype)) {
Converter webConv = new TikaHTMLConverter(this.scrubHTML, maxHTMLBuffer);
converters.put(mimetype, webConv);
converters.put("htm", webConv);
converters.put("xhtml", webConv);
requestedFileTypes.add("htm");
requestedFileTypes.add("xhtml");
}
MessageConverter emailParser = new MessageConverter();
mimetype = "eml";
if (requestedFileTypes.contains(mimetype)) {
converters.put(mimetype, emailParser);
}
mimetype = "msg";
if (requestedFileTypes.contains(mimetype)) {
converters.put(mimetype, emailParser);
}
WebArchiveConverter webArchiveParser = new WebArchiveConverter();
mimetype = "mht";
/* RFC822 */
if (requestedFileTypes.contains(mimetype)) {
converters.put(mimetype, webArchiveParser);
}
ImageMetadataConverter imgConv = new ImageMetadataConverter();
String[] imageTypes = { "jpeg", "jpg" };
for (String img : imageTypes) {
if (requestedFileTypes.contains(img)) {
converters.put(img, imgConv);
}
}
//
for (String t : requestedFileTypes) {
ignoreFileType(t + ".txt");
}
fileFilters = requestedFileTypes.toArray(new String[requestedFileTypes.size()]);
}
Aggregations