Search in sources :

Example 1 with RedisUserDocumentQueue

use of org.icij.datashare.extract.RedisUserDocumentQueue in project datashare by ICIJ.

the class CliApp method runTaskRunner.

private static void runTaskRunner(Injector injector, Properties properties) throws Exception {
    TaskManagerMemory taskManager = injector.getInstance(TaskManagerMemory.class);
    TaskFactory taskFactory = injector.getInstance(TaskFactory.class);
    Set<Pipeline.Type> nlpPipelines = parseAll(properties.getProperty(DatashareCliOptions.NLP_PIPELINES_OPT));
    Indexer indexer = injector.getInstance(Indexer.class);
    if (resume(properties)) {
        RedisUserDocumentQueue queue = new RedisUserDocumentQueue(nullUser(), new PropertiesProvider(properties));
        boolean queueIsEmpty = queue.isEmpty();
        queue.close();
        if (indexer.search(properties.getProperty("defaultProject"), Document.class).withSource(false).without(nlpPipelines.toArray(new Pipeline.Type[] {})).execute().count() == 0 && queueIsEmpty) {
            logger.info("nothing to resume, exiting normally");
            System.exit(0);
        }
    }
    if (properties.getProperty(CREATE_INDEX_OPT) != null) {
        indexer.createIndex(properties.getProperty(CREATE_INDEX_OPT));
        System.exit(0);
    }
    if (properties.getProperty(CRE_API_KEY_OPT) != null) {
        String userName = properties.getProperty(CRE_API_KEY_OPT);
        String secretKey = taskFactory.createGenApiKey(localUser(userName)).call();
        logger.info("generated secret key for user {} (store it somewhere safe, datashare cannot retrieve it later): {}", userName, secretKey);
        System.exit(0);
    }
    if (properties.getProperty(GET_API_KEY_OPT) != null) {
        String userName = properties.getProperty(GET_API_KEY_OPT);
        String hashedKey = taskFactory.createGetApiKey(localUser(userName)).call();
        if ((hashedKey == null)) {
            logger.info("no user {} exists", userName);
        } else {
            logger.info("hashed key for user {} is {}", userName, hashedKey);
        }
        System.exit(0);
    }
    if (properties.getProperty(DEL_API_KEY_OPT) != null) {
        String userName = properties.getProperty(DEL_API_KEY_OPT);
        taskFactory.createDelApiKey(localUser(userName)).call();
        System.exit(0);
    }
    PipelineHelper pipeline = new PipelineHelper(new PropertiesProvider(properties));
    if (pipeline.has(DatashareCli.Stage.DEDUPLICATE)) {
        taskManager.startTask(taskFactory.createDeduplicateTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.DEDUPLICATE)));
    }
    if (pipeline.has(DatashareCli.Stage.SCANIDX)) {
        TaskView<Long> taskView = taskManager.startTask(taskFactory.createScanIndexTask(nullUser(), ofNullable(properties.getProperty(MAP_NAME_OPTION)).orElse("extract:report")));
        logger.info("scanned {}", taskView.getResult(true));
    }
    if (pipeline.has(DatashareCli.Stage.SCAN) && !resume(properties)) {
        taskManager.startTask(taskFactory.createScanTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.SCAN), Paths.get(properties.getProperty(DatashareCliOptions.DATA_DIR_OPT)), properties), () -> closeAndLogException(injector.getInstance(DocumentQueue.class)).run());
    }
    if (pipeline.has(DatashareCli.Stage.INDEX)) {
        taskManager.startTask(taskFactory.createIndexTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.INDEX), properties), () -> closeAndLogException(injector.getInstance(DocumentQueue.class)).run());
    }
    if (pipeline.has(DatashareCli.Stage.NLP)) {
        for (Pipeline.Type nlp : nlpPipelines) {
            Pipeline pipelineClass = injector.getInstance(PipelineRegistry.class).get(nlp);
            taskManager.startTask(taskFactory.createNlpTask(nullUser(), pipelineClass));
        }
        if (resume(properties)) {
            taskManager.startTask(taskFactory.createResumeNlpTask(nullUser(), nlpPipelines));
        }
    }
    taskManager.shutdownAndAwaitTermination(Integer.MAX_VALUE, SECONDS);
    indexer.close();
}
Also used : DocumentQueue(org.icij.extract.queue.DocumentQueue) RedisUserDocumentQueue(org.icij.datashare.extract.RedisUserDocumentQueue) TaskManagerMemory(org.icij.datashare.tasks.TaskManagerMemory) PipelineRegistry(org.icij.datashare.extension.PipelineRegistry) Pipeline(org.icij.datashare.text.nlp.Pipeline) Indexer(org.icij.datashare.text.indexing.Indexer) TaskFactory(org.icij.datashare.tasks.TaskFactory) RedisUserDocumentQueue(org.icij.datashare.extract.RedisUserDocumentQueue)

Aggregations

PipelineRegistry (org.icij.datashare.extension.PipelineRegistry)1 RedisUserDocumentQueue (org.icij.datashare.extract.RedisUserDocumentQueue)1 TaskFactory (org.icij.datashare.tasks.TaskFactory)1 TaskManagerMemory (org.icij.datashare.tasks.TaskManagerMemory)1 Indexer (org.icij.datashare.text.indexing.Indexer)1 Pipeline (org.icij.datashare.text.nlp.Pipeline)1 DocumentQueue (org.icij.extract.queue.DocumentQueue)1