use of org.apache.tika.batch.StatusReporter in project tika by apache.
the class BatchProcessBuilder method build.
/**
* Builds a FileResourceBatchProcessor from runtime arguments and a
* document node of a configuration file. With the exception of the QueueBuilder,
* the builders choose how to adjudicate between
* runtime arguments and the elements in the configuration file.
*
* @param docElement document element of the xml config file
* @param incomingRuntimeAttributes runtime arguments
* @return FileResourceBatchProcessor
*/
public BatchProcess build(Node docElement, Map<String, String> incomingRuntimeAttributes) {
//key components
long timeoutThresholdMillis = XMLDOMUtil.getLong("timeoutThresholdMillis", incomingRuntimeAttributes, docElement);
long timeoutCheckPulseMillis = XMLDOMUtil.getLong("timeoutCheckPulseMillis", incomingRuntimeAttributes, docElement);
long pauseOnEarlyTerminationMillis = XMLDOMUtil.getLong("pauseOnEarlyTerminationMillis", incomingRuntimeAttributes, docElement);
int maxAliveTimeSeconds = XMLDOMUtil.getInt("maxAliveTimeSeconds", incomingRuntimeAttributes, docElement);
FileResourceCrawler crawler = null;
ConsumersManager consumersManager = null;
StatusReporter reporter = null;
Interrupter interrupter = null;
/*
* TODO: This is a bit smelly. NumConsumers needs to be used by the crawler
* and the consumers. This copies the incomingRuntimeAttributes and then
* supplies the numConsumers from the commandline (if it exists) or from the config file
* At least this creates an unmodifiable defensive copy of incomingRuntimeAttributes...
*/
Map<String, String> runtimeAttributes = setNumConsumersInRuntimeAttributes(docElement, incomingRuntimeAttributes);
//build queue
ArrayBlockingQueue<FileResource> queue = buildQueue(docElement, runtimeAttributes);
NodeList children = docElement.getChildNodes();
Map<String, Node> keyNodes = new HashMap<String, Node>();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
if (child.getNodeType() != Node.ELEMENT_NODE) {
continue;
}
String nodeName = child.getNodeName();
keyNodes.put(nodeName, child);
}
//build consumers
consumersManager = buildConsumersManager(keyNodes.get("consumers"), runtimeAttributes, queue);
//build crawler
crawler = buildCrawler(queue, keyNodes.get("crawler"), runtimeAttributes);
reporter = buildReporter(crawler, consumersManager, keyNodes.get("reporter"), runtimeAttributes);
interrupter = buildInterrupter(keyNodes.get("interrupter"), runtimeAttributes);
BatchProcess proc = new BatchProcess(crawler, consumersManager, reporter, interrupter);
if (timeoutThresholdMillis > -1) {
proc.setTimeoutThresholdMillis(timeoutThresholdMillis);
}
if (pauseOnEarlyTerminationMillis > -1) {
proc.setPauseOnEarlyTerminationMillis(pauseOnEarlyTerminationMillis);
}
if (timeoutCheckPulseMillis > -1) {
proc.setTimeoutCheckPulseMillis(timeoutCheckPulseMillis);
}
proc.setMaxAliveTimeSeconds(maxAliveTimeSeconds);
return proc;
}
use of org.apache.tika.batch.StatusReporter in project tika by apache.
the class SimpleLogReporterBuilder method build.
@Override
public StatusReporter build(FileResourceCrawler crawler, ConsumersManager consumersManager, Node n, Map<String, String> commandlineArguments) {
Map<String, String> attributes = XMLDOMUtil.mapifyAttrs(n, commandlineArguments);
long sleepMillis = PropsUtil.getLong(attributes.get("reporterSleepMillis"), 1000L);
long staleThresholdMillis = PropsUtil.getLong(attributes.get("reporterStaleThresholdMillis"), 500000L);
StatusReporter reporter = new StatusReporter(crawler, consumersManager);
reporter.setSleepMillis(sleepMillis);
reporter.setStaleThresholdMillis(staleThresholdMillis);
return reporter;
}
Aggregations