Search in sources :

Example 1 with PreprocessingJob

use of com.bakdata.conquery.models.preproc.PreprocessingJob in project conquery by bakdata.

the class PreprocessorCommand method run.

@Override
protected void run(Environment environment, Namespace namespace, ConqueryConfig config) throws Exception {
    if (pool == null) {
        pool = Executors.newFixedThreadPool(config.getPreprocessor().getNThreads());
    }
    // Tag if present is appended to input-file csvs, output-file cqpp and used as id of cqpps
    isFailFast = Optional.ofNullable(namespace.getBoolean("fast-fail")).orElse(false);
    isStrict = Optional.ofNullable(namespace.getBoolean("strict")).orElse(true);
    final List<String> tags = namespace.<String>getList("tag");
    final File inDir = namespace.get("in");
    final File outDir = namespace.get("out");
    final List<File> descriptionFiles = namespace.<File>getList("desc");
    log.info("Preprocessing from command line config.");
    final Collection<PreprocessingJob> jobs = new ArrayList<>();
    if (tags == null || tags.isEmpty()) {
        for (File desc : descriptionFiles) {
            final List<PreprocessingJob> descriptions = findPreprocessingDescriptions(desc, inDir, outDir, Optional.empty(), environment.getValidator());
            jobs.addAll(descriptions);
        }
    } else {
        for (String tag : tags) {
            for (File desc : descriptionFiles) {
                final List<PreprocessingJob> jobDescriptions = findPreprocessingDescriptions(desc, inDir, outDir, Optional.of(tag), environment.getValidator());
                jobs.addAll(jobDescriptions);
            }
        }
    }
    List<PreprocessingJob> missing = new ArrayList<>();
    for (PreprocessingJob job : jobs) {
        for (TableInputDescriptor input : job.getDescriptor().getInputs()) {
            final File sourceFile = Preprocessor.resolveSourceFile(input.getSourceFile(), job.getCsvDirectory(), job.getTag());
            if (!sourceFile.exists()) {
                log.error("Did not find file `{}` for Preprocessing[{}].", sourceFile, job);
                missing.add(job);
            }
        }
    }
    // This will halt preprocessing immediately.
    if (isStrict && !missing.isEmpty()) {
        log.error("FAILED Preprocessing, files are missing.");
        doFail();
    }
    jobs.removeIf(Predicate.not(PreprocessorCommand::requiresProcessing));
    final long totalSize = jobs.stream().mapToLong(PreprocessingJob::estimateTotalCsvSizeBytes).sum();
    log.info("Required to preprocess {} in total", BinaryByteUnit.format(totalSize));
    ProgressBar totalProgress = new ProgressBar(totalSize, System.out);
    for (PreprocessingJob job : jobs) {
        pool.submit(() -> {
            ConqueryMDC.setLocation(job.toString());
            try {
                Preprocessor.preprocess(job, totalProgress, config);
                success.add(job.toString());
            } catch (FileNotFoundException e) {
                log.warn("Did not find file `{}` for preprocessing.", e.getMessage());
                addMissing(job);
            } catch (Exception e) {
                log.error("Failed to preprocess " + LogUtil.printPath(job.getDescriptionFile()), e);
                addFailed(job);
            }
        });
    }
    pool.shutdown();
    pool.awaitTermination(24, TimeUnit.HOURS);
    ConqueryMDC.clearLocation();
    if (!success.isEmpty()) {
        log.info("Successfully Preprocess {} Jobs:", success.size());
        success.forEach(desc -> log.info("\tSucceeded Preprocessing for {}", desc));
    }
    if (!missing.isEmpty()) {
        log.warn("Did not find {} Files", missing.size());
        missing.forEach(desc -> log.warn("\tDid not find file for {}", desc));
    }
    if (isFailed()) {
        log.error("Failed {} Preprocessing Jobs:", failed.size());
        failed.forEach(desc -> log.error("\tFailed Preprocessing for {}", desc));
        doFail();
    }
}
Also used : ArrayList(java.util.ArrayList) FileNotFoundException(java.io.FileNotFoundException) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PreprocessingJob(com.bakdata.conquery.models.preproc.PreprocessingJob) TableInputDescriptor(com.bakdata.conquery.models.preproc.TableInputDescriptor) File(java.io.File) ProgressBar(com.bakdata.conquery.util.io.ProgressBar)

Example 2 with PreprocessingJob

use of com.bakdata.conquery.models.preproc.PreprocessingJob in project conquery by bakdata.

the class PreprocessorCommand method tryExtractDescriptor.

private Optional<PreprocessingJob> tryExtractDescriptor(Validator validator, Optional<String> tag, File descriptionFile, File outputDir, File csvDir) throws IOException {
    try {
        final TableImportDescriptor descriptor = TableImportDescriptor.read(descriptionFile);
        validator.validate(validator);
        final PreprocessingJob preprocessingJob = new PreprocessingJob(csvDir.toPath(), descriptionFile, outputDir.toPath(), tag, descriptor);
        // Override name to tag if present
        tag.ifPresent(descriptor::setName);
        return Optional.of(preprocessingJob);
    } catch (Exception e) {
        log.error("Failed to process " + LogUtil.printPath(descriptionFile), e);
        if (isFailFast) {
            doFail();
        }
        failed.add(descriptionFile.toString());
    }
    return Optional.empty();
}
Also used : TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PreprocessingJob(com.bakdata.conquery.models.preproc.PreprocessingJob)

Example 3 with PreprocessingJob

use of com.bakdata.conquery.models.preproc.PreprocessingJob in project conquery by bakdata.

the class PreprocessorCommand method findPreprocessingDescriptions.

public List<PreprocessingJob> findPreprocessingDescriptions(File descriptionFiles, File inDir, File outputDir, Optional<String> tag, Validator validator) throws IOException {
    List<PreprocessingJob> out = new ArrayList<>();
    final File[] files = descriptionFiles.isFile() ? new File[] { descriptionFiles } : descriptionFiles.listFiles(((dir, name) -> name.endsWith(ConqueryConstants.EXTENSION_DESCRIPTION)));
    if (files == null) {
        return Collections.emptyList();
    }
    for (File descriptionFile : files) {
        tryExtractDescriptor(validator, tag, descriptionFile, outputDir, inDir).ifPresent(out::add);
    }
    return out;
}
Also used : ArrayList(java.util.ArrayList) File(java.io.File) PreprocessingJob(com.bakdata.conquery.models.preproc.PreprocessingJob)

Aggregations

PreprocessingJob (com.bakdata.conquery.models.preproc.PreprocessingJob)3 File (java.io.File)2 FileNotFoundException (java.io.FileNotFoundException)2 IOException (java.io.IOException)2 ArrayList (java.util.ArrayList)2 TableImportDescriptor (com.bakdata.conquery.models.preproc.TableImportDescriptor)1 TableInputDescriptor (com.bakdata.conquery.models.preproc.TableInputDescriptor)1 ProgressBar (com.bakdata.conquery.util.io.ProgressBar)1