use of org.icij.datashare.extract.RedisUserDocumentQueue in project datashare by ICIJ.
the class CliApp method runTaskRunner.
private static void runTaskRunner(Injector injector, Properties properties) throws Exception {
TaskManagerMemory taskManager = injector.getInstance(TaskManagerMemory.class);
TaskFactory taskFactory = injector.getInstance(TaskFactory.class);
Set<Pipeline.Type> nlpPipelines = parseAll(properties.getProperty(DatashareCliOptions.NLP_PIPELINES_OPT));
Indexer indexer = injector.getInstance(Indexer.class);
if (resume(properties)) {
RedisUserDocumentQueue queue = new RedisUserDocumentQueue(nullUser(), new PropertiesProvider(properties));
boolean queueIsEmpty = queue.isEmpty();
queue.close();
if (indexer.search(properties.getProperty("defaultProject"), Document.class).withSource(false).without(nlpPipelines.toArray(new Pipeline.Type[] {})).execute().count() == 0 && queueIsEmpty) {
logger.info("nothing to resume, exiting normally");
System.exit(0);
}
}
if (properties.getProperty(CREATE_INDEX_OPT) != null) {
indexer.createIndex(properties.getProperty(CREATE_INDEX_OPT));
System.exit(0);
}
if (properties.getProperty(CRE_API_KEY_OPT) != null) {
String userName = properties.getProperty(CRE_API_KEY_OPT);
String secretKey = taskFactory.createGenApiKey(localUser(userName)).call();
logger.info("generated secret key for user {} (store it somewhere safe, datashare cannot retrieve it later): {}", userName, secretKey);
System.exit(0);
}
if (properties.getProperty(GET_API_KEY_OPT) != null) {
String userName = properties.getProperty(GET_API_KEY_OPT);
String hashedKey = taskFactory.createGetApiKey(localUser(userName)).call();
if ((hashedKey == null)) {
logger.info("no user {} exists", userName);
} else {
logger.info("hashed key for user {} is {}", userName, hashedKey);
}
System.exit(0);
}
if (properties.getProperty(DEL_API_KEY_OPT) != null) {
String userName = properties.getProperty(DEL_API_KEY_OPT);
taskFactory.createDelApiKey(localUser(userName)).call();
System.exit(0);
}
PipelineHelper pipeline = new PipelineHelper(new PropertiesProvider(properties));
if (pipeline.has(DatashareCli.Stage.DEDUPLICATE)) {
taskManager.startTask(taskFactory.createDeduplicateTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.DEDUPLICATE)));
}
if (pipeline.has(DatashareCli.Stage.SCANIDX)) {
TaskView<Long> taskView = taskManager.startTask(taskFactory.createScanIndexTask(nullUser(), ofNullable(properties.getProperty(MAP_NAME_OPTION)).orElse("extract:report")));
logger.info("scanned {}", taskView.getResult(true));
}
if (pipeline.has(DatashareCli.Stage.SCAN) && !resume(properties)) {
taskManager.startTask(taskFactory.createScanTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.SCAN), Paths.get(properties.getProperty(DatashareCliOptions.DATA_DIR_OPT)), properties), () -> closeAndLogException(injector.getInstance(DocumentQueue.class)).run());
}
if (pipeline.has(DatashareCli.Stage.INDEX)) {
taskManager.startTask(taskFactory.createIndexTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.INDEX), properties), () -> closeAndLogException(injector.getInstance(DocumentQueue.class)).run());
}
if (pipeline.has(DatashareCli.Stage.NLP)) {
for (Pipeline.Type nlp : nlpPipelines) {
Pipeline pipelineClass = injector.getInstance(PipelineRegistry.class).get(nlp);
taskManager.startTask(taskFactory.createNlpTask(nullUser(), pipelineClass));
}
if (resume(properties)) {
taskManager.startTask(taskFactory.createResumeNlpTask(nullUser(), nlpPipelines));
}
}
taskManager.shutdownAndAwaitTermination(Integer.MAX_VALUE, SECONDS);
indexer.close();
}
Aggregations