Search in sources :

Example 1 with TaskManagerMemory

use of org.icij.datashare.tasks.TaskManagerMemory in project datashare by ICIJ.

the class LocalMode method configure.

@Override
protected void configure() {
    super.configure();
    bind(IndexWaiterFilter.class).asEagerSingleton();
    bind(StatusResource.class).asEagerSingleton();
    bind(TaskManager.class).toInstance(new TaskManagerMemory(propertiesProvider));
    configurePersistence();
}
Also used : TaskManager(org.icij.datashare.tasks.TaskManager) TaskManagerMemory(org.icij.datashare.tasks.TaskManagerMemory)

Example 2 with TaskManagerMemory

use of org.icij.datashare.tasks.TaskManagerMemory in project datashare by ICIJ.

the class CliMode method configure.

@Override
protected void configure() {
    super.configure();
    bind(TaskManager.class).toInstance(new TaskManagerMemory(propertiesProvider));
    RepositoryFactoryImpl repositoryFactory = new RepositoryFactoryImpl(propertiesProvider);
    bind(BatchSearchRepository.class).toInstance(repositoryFactory.createBatchSearchRepository());
    bind(ApiKeyRepository.class).toInstance(repositoryFactory.createApiKeyRepository());
    repositoryFactory.initDatabase();
}
Also used : RepositoryFactoryImpl(org.icij.datashare.db.RepositoryFactoryImpl) TaskManager(org.icij.datashare.tasks.TaskManager) ApiKeyRepository(org.icij.datashare.user.ApiKeyRepository) BatchSearchRepository(org.icij.datashare.batch.BatchSearchRepository) TaskManagerMemory(org.icij.datashare.tasks.TaskManagerMemory)

Example 3 with TaskManagerMemory

use of org.icij.datashare.tasks.TaskManagerMemory in project datashare by ICIJ.

the class CliApp method runTaskRunner.

private static void runTaskRunner(Injector injector, Properties properties) throws Exception {
    TaskManagerMemory taskManager = injector.getInstance(TaskManagerMemory.class);
    TaskFactory taskFactory = injector.getInstance(TaskFactory.class);
    Set<Pipeline.Type> nlpPipelines = parseAll(properties.getProperty(DatashareCliOptions.NLP_PIPELINES_OPT));
    Indexer indexer = injector.getInstance(Indexer.class);
    if (resume(properties)) {
        RedisUserDocumentQueue queue = new RedisUserDocumentQueue(nullUser(), new PropertiesProvider(properties));
        boolean queueIsEmpty = queue.isEmpty();
        queue.close();
        if (indexer.search(properties.getProperty("defaultProject"), Document.class).withSource(false).without(nlpPipelines.toArray(new Pipeline.Type[] {})).execute().count() == 0 && queueIsEmpty) {
            logger.info("nothing to resume, exiting normally");
            System.exit(0);
        }
    }
    if (properties.getProperty(CREATE_INDEX_OPT) != null) {
        indexer.createIndex(properties.getProperty(CREATE_INDEX_OPT));
        System.exit(0);
    }
    if (properties.getProperty(CRE_API_KEY_OPT) != null) {
        String userName = properties.getProperty(CRE_API_KEY_OPT);
        String secretKey = taskFactory.createGenApiKey(localUser(userName)).call();
        logger.info("generated secret key for user {} (store it somewhere safe, datashare cannot retrieve it later): {}", userName, secretKey);
        System.exit(0);
    }
    if (properties.getProperty(GET_API_KEY_OPT) != null) {
        String userName = properties.getProperty(GET_API_KEY_OPT);
        String hashedKey = taskFactory.createGetApiKey(localUser(userName)).call();
        if ((hashedKey == null)) {
            logger.info("no user {} exists", userName);
        } else {
            logger.info("hashed key for user {} is {}", userName, hashedKey);
        }
        System.exit(0);
    }
    if (properties.getProperty(DEL_API_KEY_OPT) != null) {
        String userName = properties.getProperty(DEL_API_KEY_OPT);
        taskFactory.createDelApiKey(localUser(userName)).call();
        System.exit(0);
    }
    PipelineHelper pipeline = new PipelineHelper(new PropertiesProvider(properties));
    if (pipeline.has(DatashareCli.Stage.DEDUPLICATE)) {
        taskManager.startTask(taskFactory.createDeduplicateTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.DEDUPLICATE)));
    }
    if (pipeline.has(DatashareCli.Stage.SCANIDX)) {
        TaskView<Long> taskView = taskManager.startTask(taskFactory.createScanIndexTask(nullUser(), ofNullable(properties.getProperty(MAP_NAME_OPTION)).orElse("extract:report")));
        logger.info("scanned {}", taskView.getResult(true));
    }
    if (pipeline.has(DatashareCli.Stage.SCAN) && !resume(properties)) {
        taskManager.startTask(taskFactory.createScanTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.SCAN), Paths.get(properties.getProperty(DatashareCliOptions.DATA_DIR_OPT)), properties), () -> closeAndLogException(injector.getInstance(DocumentQueue.class)).run());
    }
    if (pipeline.has(DatashareCli.Stage.INDEX)) {
        taskManager.startTask(taskFactory.createIndexTask(nullUser(), pipeline.getQueueNameFor(DatashareCli.Stage.INDEX), properties), () -> closeAndLogException(injector.getInstance(DocumentQueue.class)).run());
    }
    if (pipeline.has(DatashareCli.Stage.NLP)) {
        for (Pipeline.Type nlp : nlpPipelines) {
            Pipeline pipelineClass = injector.getInstance(PipelineRegistry.class).get(nlp);
            taskManager.startTask(taskFactory.createNlpTask(nullUser(), pipelineClass));
        }
        if (resume(properties)) {
            taskManager.startTask(taskFactory.createResumeNlpTask(nullUser(), nlpPipelines));
        }
    }
    taskManager.shutdownAndAwaitTermination(Integer.MAX_VALUE, SECONDS);
    indexer.close();
}
Also used : DocumentQueue(org.icij.extract.queue.DocumentQueue) RedisUserDocumentQueue(org.icij.datashare.extract.RedisUserDocumentQueue) TaskManagerMemory(org.icij.datashare.tasks.TaskManagerMemory) PipelineRegistry(org.icij.datashare.extension.PipelineRegistry) Pipeline(org.icij.datashare.text.nlp.Pipeline) Indexer(org.icij.datashare.text.indexing.Indexer) TaskFactory(org.icij.datashare.tasks.TaskFactory) RedisUserDocumentQueue(org.icij.datashare.extract.RedisUserDocumentQueue)

Example 4 with TaskManagerMemory

use of org.icij.datashare.tasks.TaskManagerMemory in project datashare by ICIJ.

the class CommonMode method configure.

@Override
protected void configure() {
    bind(PropertiesProvider.class).toInstance(propertiesProvider);
    bind(LanguageGuesser.class).to(OptimaizeLanguageGuesser.class);
    String batchQueueType = propertiesProvider.get("batchQueueType").orElse("org.icij.datashare.extract.MemoryBlockingQueue");
    bind(new TypeLiteral<BlockingQueue<String>>() {
    }).toInstance(getBlockingQueue(propertiesProvider, batchQueueType, "ds:batchsearch:queue"));
    bind(new TypeLiteral<BlockingQueue<BatchDownload>>() {
    }).toInstance(getBlockingQueue(propertiesProvider, batchQueueType, "ds:batchdownload:queue"));
    RestHighLevelClient esClient = createESClient(propertiesProvider);
    bind(RestHighLevelClient.class).toInstance(esClient);
    bind(Indexer.class).to(ElasticsearchIndexer.class).asEagerSingleton();
    bind(TaskManagerMemory.class).toInstance(new TaskManagerMemory(propertiesProvider));
    install(new FactoryModuleBuilder().build(TaskFactory.class));
    if ("memory".equals(propertiesProvider.getProperties().get("queueType"))) {
        bind(DocumentCollectionFactory.class).to(MemoryDocumentCollectionFactory.class).asEagerSingleton();
    } else {
        install(new FactoryModuleBuilder().implement(DocumentQueue.class, RedisUserDocumentQueue.class).implement(ReportMap.class, RedisUserReportMap.class).build(DocumentCollectionFactory.class));
    }
    DataBus dataBus;
    if ("memory".equals(propertiesProvider.getProperties().get("busType"))) {
        dataBus = new MemoryDataBus();
    } else {
        dataBus = new RedisDataBus(propertiesProvider);
    }
    bind(DataBus.class).toInstance(dataBus);
    bind(Publisher.class).toInstance(dataBus);
    PipelineRegistry pipelineRegistry = new PipelineRegistry(propertiesProvider);
    pipelineRegistry.register(EmailPipeline.class);
    pipelineRegistry.register(Pipeline.Type.CORENLP);
    try {
        pipelineRegistry.load();
    } catch (FileNotFoundException e) {
        LoggerFactory.getLogger(getClass()).info("extensions dir not found " + e.getMessage());
    }
    bind(PipelineRegistry.class).toInstance(pipelineRegistry);
}
Also used : BatchDownload(org.icij.datashare.batch.BatchDownload) FactoryModuleBuilder(com.google.inject.assistedinject.FactoryModuleBuilder) FileNotFoundException(java.io.FileNotFoundException) MemoryDataBus(org.icij.datashare.com.MemoryDataBus) RedisUserDocumentQueue(org.icij.datashare.extract.RedisUserDocumentQueue) DocumentQueue(org.icij.extract.queue.DocumentQueue) RestHighLevelClient(org.elasticsearch.client.RestHighLevelClient) DataBus(org.icij.datashare.com.DataBus) MemoryDataBus(org.icij.datashare.com.MemoryDataBus) RedisDataBus(org.icij.datashare.com.RedisDataBus) Publisher(org.icij.datashare.com.Publisher) TaskManagerMemory(org.icij.datashare.tasks.TaskManagerMemory) PipelineRegistry(org.icij.datashare.extension.PipelineRegistry) PropertiesProvider(org.icij.datashare.PropertiesProvider) MemoryDocumentCollectionFactory(org.icij.datashare.tasks.MemoryDocumentCollectionFactory) TypeLiteral(com.google.inject.TypeLiteral) MemoryDocumentCollectionFactory(org.icij.datashare.tasks.MemoryDocumentCollectionFactory) DocumentCollectionFactory(org.icij.datashare.tasks.DocumentCollectionFactory) TaskFactory(org.icij.datashare.tasks.TaskFactory) RedisDataBus(org.icij.datashare.com.RedisDataBus) OptimaizeLanguageGuesser(org.icij.datashare.nlp.OptimaizeLanguageGuesser) LanguageGuesser(org.icij.datashare.text.indexing.LanguageGuesser) ElasticsearchIndexer(org.icij.datashare.text.indexing.elasticsearch.ElasticsearchIndexer) RedisUserDocumentQueue(org.icij.datashare.extract.RedisUserDocumentQueue)

Aggregations

TaskManagerMemory (org.icij.datashare.tasks.TaskManagerMemory)4 PipelineRegistry (org.icij.datashare.extension.PipelineRegistry)2 RedisUserDocumentQueue (org.icij.datashare.extract.RedisUserDocumentQueue)2 TaskFactory (org.icij.datashare.tasks.TaskFactory)2 TaskManager (org.icij.datashare.tasks.TaskManager)2 DocumentQueue (org.icij.extract.queue.DocumentQueue)2 TypeLiteral (com.google.inject.TypeLiteral)1 FactoryModuleBuilder (com.google.inject.assistedinject.FactoryModuleBuilder)1 FileNotFoundException (java.io.FileNotFoundException)1 RestHighLevelClient (org.elasticsearch.client.RestHighLevelClient)1 PropertiesProvider (org.icij.datashare.PropertiesProvider)1 BatchDownload (org.icij.datashare.batch.BatchDownload)1 BatchSearchRepository (org.icij.datashare.batch.BatchSearchRepository)1 DataBus (org.icij.datashare.com.DataBus)1 MemoryDataBus (org.icij.datashare.com.MemoryDataBus)1 Publisher (org.icij.datashare.com.Publisher)1 RedisDataBus (org.icij.datashare.com.RedisDataBus)1 RepositoryFactoryImpl (org.icij.datashare.db.RepositoryFactoryImpl)1 OptimaizeLanguageGuesser (org.icij.datashare.nlp.OptimaizeLanguageGuesser)1 DocumentCollectionFactory (org.icij.datashare.tasks.DocumentCollectionFactory)1