use of org.opensextant.ConfigException in project Xponents by OpenSextant.
the class XText method convertFile.
/**
* Convert one file and save it off. We ignore hidden files and files in
* hidden folders, e.g., .cvs_ignore, mycode/.svn/abc.txt
*
* This is the end of the line for the conversion logic; convertFile figures
* out if it should return the cached version or attempt a conversion; it
* also tries to save children items As children items may require special
* attention they are not converted -- caller can pass in ConversionListener
* and can deal with children file objects on their end.
*
* @param input
* child input obj to convert
* @param parent
* parent in which child was found
* @return converted document object
* @throws IOException
* on err
* @throws ConfigException
* on err
*/
public ConvertedDocument convertFile(File input, ConvertedDocument parent) throws IOException, ConfigException {
if (parent == null && filterOutFile(input)) {
return null;
}
if (paths.isSaving()) {
if (!paths.isSaveWithInput() && !paths.hasInputRoot()) {
throw new IOException("Please set an input root; convertFile() was called in save/cache mode without having PathManager setup");
}
}
String fname = input.getName();
String ext = FilenameUtils.getExtension(fname).toLowerCase();
if (!allowNoExtension) {
if (ignoreFileTypes.contains(ext)) {
return null;
}
if (!requestedFileTypes.contains(ext)) {
return null;
}
}
log.debug("Converting FILE=" + input.getAbsolutePath());
/*
* Handle archives or PST files. Or other large compound single file.
*/
if (isArchive(fname)) {
convertArchive(input);
// However its children do.
return null;
} else if (isPSTExtension(ext) && !useTikaPST) {
convertOutlookPST(input);
return null;
}
/*
* Otherwise this is a normal file...
*/
if (FileUtils.sizeOf(input) > maxFileSize) {
log.info("Valid File is too large FILE=" + input.getAbsolutePath());
return null;
}
boolean cachable = true;
Converter converter = converters.get(ext);
if (converter == null) {
if (extractEmbedded && EmbeddedContentConverter.isSupported(ext)) {
converter = embeddedConversion;
// Such content is processed every time. Oh well...
cachable = false;
} else {
converter = defaultConversion;
}
}
ConvertedDocument textDoc = null;
// ------------------
if (cachable && !ConvertedDocument.overwrite && paths.isSaving()) {
textDoc = paths.getCachedConversion(input);
}
// ------------------
if (textDoc == null) {
// Measure how long conversions take.
long t1 = System.currentTimeMillis();
try {
textDoc = converter.convert(input);
} catch (Exception convErr) {
throw new IOException("Conversion error FILE=" + input.getPath(), convErr);
}
long t2 = System.currentTimeMillis();
int duration = (int) (t2 - t1);
if (textDoc != null) {
// }
if (paths.isSaving() && textDoc.is_converted) {
// Get Parent info in there.
if (parent != null) {
textDoc.setParent(parent);
}
paths.saveConversion(textDoc);
//
if (textDoc.hasRawChildren()) {
convertChildren(textDoc);
// 1. children saved to disk
// 2. children converted.
// 3. children attached to parent here.
// 'textdoc' should now be well endowed with all the
// children metadata.
}
}
} else {
textDoc = new ConvertedDocument(input);
}
textDoc.conversion_time = duration;
if (textDoc.filetime == null) {
textDoc.filetime = textDoc.getFiletime();
}
}
/*
* Conversion Listeners are called only for parent documents. That is
* for an email with 4 attachments, this listener is called on the
* parent email message, but not for the individual 4 attachments. The
* final parent document here will have all Raw Children (bytes +
* metadata) and Converted Children (ConvertedDocument obj) Caller will
* have to detect if returned item via listener is a Parent with
* Children.
*
* Behavior here is TBD.
*/
if (postProcessor != null && parent == null) {
postProcessor.handleConversion(textDoc, input.getAbsolutePath());
}
trackStatistics(textDoc);
return textDoc;
}
use of org.opensextant.ConfigException in project Xponents by OpenSextant.
the class XText method convertOutlookPST.
/**
*
* @param input
* input PST object
* @throws IOException
* on err
* @throws ConfigException
* on err
*/
public void convertOutlookPST(File input) throws ConfigException, IOException {
if (!paths.isSaving()) {
log.error("Warning -- PST file found, but save = true is required to parse it. Enable saving and chose a cache folder");
}
OutlookPSTCrawler pst = new OutlookPSTCrawler(input);
pst.setConverter(this);
pst.overwriteMode = ConvertedDocument.overwrite;
pst.incrementalMode = true;
File saveFolder = paths.getArchiveExportDir(input);
String savePrefix = paths.getStipPrefixPath();
paths.setStripPrefixPath(saveFolder.getAbsolutePath());
paths.setInputRoot(saveFolder);
pst.setOutputPSTDir(saveFolder);
pst.configure();
log.info("\tPST Email Archive Found ({}). Expanding to {}", input, saveFolder);
try {
pst.collect();
} catch (Exception err) {
throw new ConfigException("Unable to fully digest PST file " + input, err);
}
// Done:
paths.setStripPrefixPath(savePrefix);
}
use of org.opensextant.ConfigException in project Xponents by OpenSextant.
the class XText method extractText.
/**
* Optional API routine. If XText is used as a main program, this is the entry point for extraction/collection.
* If XText is used as an API, caller may use convertFile() directly without engaging in the setup and assumptions
* behind this convenience method.
* The main entry point to converting compound documents and folders.
*
* @param filepath
* item from which we extract text
* @throws IOException
* err
* @throws ConfigException
* err
*/
public void extractText(String filepath) throws IOException, ConfigException {
start_time = System.currentTimeMillis();
log.info("Conversion. INPUT PATH={}", filepath);
String path = FilenameUtils.normalize(new File(filepath).getAbsolutePath(), true);
if (path == null) {
throw new IOException("Failed to normalize the path: " + filepath);
}
File input = new File(path);
if (!input.exists()) {
throw new IOException("Non existent input FILE=" + path);
}
/* Filter on absolute path */
if (PathManager.isXTextCache(path)) {
throw new ConfigException("XText cannot be directed to extract text from its own cache files. " + "Move the cache files out of ./xtext/ folders if you really need to do this.");
}
if (isArchive(input.getName())) {
// Archive will collect originals to "export"
// Archive will save conversions to "output"
// PathManager is STATEFUL for as long as this archive is processing
// If an archive is uncovered while traversing files, its contents can be dumped to the child export folder.
convertArchive(input);
} else if (isPST(input.getName()) && !useTikaPST) {
this.convertOutlookPST(input);
} else if (input.isFile()) {
// If prefix is not set, then conversion will be dumped flatly to output area.
paths.setInputRoot(input);
convertFile(input);
} else if (input.isDirectory()) {
paths.setInputRoot(input);
convertFolder(input);
}
stop_time = System.currentTimeMillis();
if (paths.isSaving()) {
if (paths.isSaveWithInput()) {
log.info("Output can be accessed at from the input folder {} in 'xtext' sub-folders", input.getParent());
} else {
log.info("Output can be accessed at " + paths.getConversionCache());
}
}
reportStatistics();
}
use of org.opensextant.ConfigException in project Xponents by OpenSextant.
the class MailClient method testAvailability.
/**
* Tests the availability of the currently configured source.
* @throws ConfigException err when testing indicates resource is not available
*/
public void testAvailability() throws ConfigException {
try {
connect();
disconnect();
} catch (Exception err) {
String msg = String.format("%s -- failed to collect mail account", getName());
throw new ConfigException(msg, err);
}
return;
}
use of org.opensextant.ConfigException in project Xponents by OpenSextant.
the class MailConfig method setProperties.
/**
* Set properties from existing Property sheet.
*
* @param props javamail props
* @throws ConfigException SSL or other property error
*/
public void setProperties(Properties props) throws ConfigException {
if (props != null) {
for (Object o : props.keySet()) {
setProperty(o.toString(), props.getProperty((String) o));
}
}
validateBasicSettings();
setConfiguration();
isSSL = getFlagProperty(getProperty("mail.imap.ssl.enable"));
if (isSSL) {
validateCertificates();
}
try {
setAdvancedSettings();
} catch (Exception securityErr) {
throw new ConfigException("Advanced settings failed", securityErr);
}
}
Aggregations