Search in sources :

Example 1 with TableImportDescriptor

use of com.bakdata.conquery.models.preproc.TableImportDescriptor in project conquery by bakdata.

the class ImportUpdateTest method execute.

@Override
public void execute(String name, TestConquery testConquery) throws Exception {
    final StandaloneSupport conquery = testConquery.getSupport(name);
    MetaStorage storage = conquery.getMetaStorage();
    String testJson = In.resource("/tests/query/UPDATE_IMPORT_TESTS/SIMPLE_TREECONCEPT_Query.json").withUTF8().readAll();
    final Dataset dataset = conquery.getDataset();
    final Namespace namespace = conquery.getNamespace();
    final ImportId importId1 = ImportId.Parser.INSTANCE.parse(dataset.getName(), "table1", "table1");
    final ImportId importId2 = ImportId.Parser.INSTANCE.parse(dataset.getName(), "table2", "table2");
    QueryTest test = (QueryTest) JsonIntegrationTest.readJson(dataset, testJson);
    final List<RequiredTable> tables = test.getContent().getTables();
    assertThat(tables.size()).isEqualTo(2);
    List<File> cqpps;
    // Manually import data, so we can do our own work.
    {
        ValidatorHelper.failOnError(log, conquery.getValidator().validate(test));
        importSecondaryIds(conquery, test.getContent().getSecondaryIds());
        conquery.waitUntilWorkDone();
        LoadingUtil.importTables(conquery, tables);
        conquery.waitUntilWorkDone();
        LoadingUtil.importConcepts(conquery, test.getRawConcepts());
        conquery.waitUntilWorkDone();
        cqpps = LoadingUtil.generateCqpp(conquery, tables);
        conquery.waitUntilWorkDone();
        assertThat(cqpps.size()).isEqualTo(tables.size());
        LoadingUtil.importCqppFiles(conquery, List.of(cqpps.get(0)));
        conquery.waitUntilWorkDone();
    }
    final Query query = IntegrationUtils.parseQuery(conquery, test.getRawQuery());
    // State before update.
    {
        log.info("Checking state before update");
        assertThat(namespace.getStorage().getAllImports()).hasSize(1);
        // Must contain the import.
        assertThat(namespace.getStorage().getAllImports()).filteredOn(imp -> imp.getId().equals(importId1)).isNotEmpty();
        assertThat(namespace.getStorage().getCentralRegistry().getOptional(importId1)).isNotEmpty();
        for (ShardNode node : conquery.getShardNodes()) {
            for (Worker worker : node.getWorkers().getWorkers().values()) {
                if (!worker.getInfo().getDataset().equals(dataset.getId())) {
                    continue;
                }
                final ModificationShieldedWorkerStorage workerStorage = worker.getStorage();
                assertThat(workerStorage.getAllCBlocks()).describedAs("CBlocks for Worker %s", worker.getInfo().getId()).filteredOn(block -> block.getBucket().getId().getDataset().equals(dataset.getId())).isNotEmpty();
                assertThat(workerStorage.getAllBuckets()).filteredOn(bucket -> bucket.getId().getDataset().equals(dataset.getId())).describedAs("Buckets for Worker %s", worker.getInfo().getId()).isNotEmpty();
                // Must contain the import.
                assertThat(workerStorage.getImport(importId1)).isNotNull();
            }
        }
        assertThat(namespace.getNumberOfEntities()).isEqualTo(4);
        // assert that the query can be executed after the import
        IntegrationUtils.assertQueryResult(conquery, query, 2L, ExecutionState.DONE, conquery.getTestUser(), 201);
    }
    // Try to update an import that does not exist should throw a Not-Found Webapplication Exception
    LoadingUtil.updateCqppFile(conquery, cqpps.get(1), Response.Status.Family.CLIENT_ERROR, "Not Found");
    conquery.waitUntilWorkDone();
    // Load manually new data for import and update the concerned import
    {
        log.info("Manually loading new data for import");
        final RequiredTable importTable = test.getContent().getTables().stream().filter(table -> table.getName().equalsIgnoreCase(importId1.getTable().getTable())).findFirst().orElseThrow();
        final String csvName = importTable.getCsv().getName();
        final String path = importTable.getCsv().getPath();
        // copy new content of the importTable into the csv-File used by the preprocessor to avoid creating multiple files withe same names
        FileUtils.copyInputStreamToFile(In.resource(path.substring(0, path.lastIndexOf('/')) + "/" + csvName.replace(".csv", ".update.csv")).asStream(), new File(conquery.getTmpDir(), csvName));
        File descriptionFile = new File(conquery.getTmpDir(), importTable.getName() + ConqueryConstants.EXTENSION_DESCRIPTION);
        File newPreprocessedFile = new File(conquery.getTmpDir(), importTable.getName() + ConqueryConstants.EXTENSION_PREPROCESSED);
        // create import descriptor
        {
            TableImportDescriptor desc = new TableImportDescriptor();
            desc.setName(importTable.getName());
            desc.setTable(importTable.getName());
            TableInputDescriptor input = new TableInputDescriptor();
            {
                input.setPrimary(importTable.getPrimaryColumn().createOutput());
                input.setSourceFile(csvName);
                input.setOutput(new OutputDescription[importTable.getColumns().length]);
                for (int i = 0; i < importTable.getColumns().length; i++) {
                    input.getOutput()[i] = importTable.getColumns()[i].createOutput();
                }
            }
            desc.setInputs(new TableInputDescriptor[] { input });
            Jackson.MAPPER.writeValue(descriptionFile, desc);
        }
        // preprocess
        conquery.preprocessTmp(conquery.getTmpDir(), List.of(descriptionFile));
        log.info("updating import");
        // correct update of the import
        LoadingUtil.updateCqppFile(conquery, newPreprocessedFile, Response.Status.Family.SUCCESSFUL, "No Content");
        conquery.waitUntilWorkDone();
    }
    // State after update.
    {
        log.info("Checking state after update");
        assertThat(namespace.getStorage().getAllImports()).hasSize(1);
        // Must contain the import.
        assertThat(namespace.getStorage().getAllImports()).filteredOn(imp -> imp.getId().equals(importId1)).isNotEmpty();
        assertThat(namespace.getStorage().getCentralRegistry().getOptional(importId1)).isNotEmpty();
        for (ShardNode node : conquery.getShardNodes()) {
            for (Worker worker : node.getWorkers().getWorkers().values()) {
                if (!worker.getInfo().getDataset().equals(dataset.getId())) {
                    continue;
                }
                final ModificationShieldedWorkerStorage workerStorage = worker.getStorage();
                assertThat(workerStorage.getAllCBlocks()).describedAs("CBlocks for Worker %s", worker.getInfo().getId()).filteredOn(block -> block.getBucket().getId().getDataset().equals(dataset.getId())).isNotEmpty();
                assertThat(workerStorage.getAllBuckets()).filteredOn(bucket -> bucket.getId().getDataset().equals(dataset.getId())).describedAs("Buckets for Worker %s", worker.getInfo().getId()).isNotEmpty();
                // Must contain the import.
                assertThat(workerStorage.getImport(importId1)).isNotNull();
            }
        }
        assertThat(namespace.getNumberOfEntities()).isEqualTo(9);
        // Issue a query and assert that it has more content.
        IntegrationUtils.assertQueryResult(conquery, query, 4L, ExecutionState.DONE, conquery.getTestUser(), 201);
    }
}
Also used : ExecutionState(com.bakdata.conquery.models.execution.ExecutionState) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) RequiredTable(com.bakdata.conquery.integration.common.RequiredTable) QueryTest(com.bakdata.conquery.integration.json.QueryTest) Worker(com.bakdata.conquery.models.worker.Worker) ModificationShieldedWorkerStorage(com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage) In(com.github.powerlibraries.io.In) LoadingUtil.importSecondaryIds(com.bakdata.conquery.integration.common.LoadingUtil.importSecondaryIds) ConqueryConstants(com.bakdata.conquery.ConqueryConstants) IntegrationUtils(com.bakdata.conquery.integration.common.IntegrationUtils) TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor) TestConquery(com.bakdata.conquery.util.support.TestConquery) ShardNode(com.bakdata.conquery.commands.ShardNode) FileUtils(org.apache.commons.io.FileUtils) LoadingUtil(com.bakdata.conquery.integration.common.LoadingUtil) File(java.io.File) ImportId(com.bakdata.conquery.models.identifiable.ids.specific.ImportId) StandaloneSupport(com.bakdata.conquery.util.support.StandaloneSupport) Dataset(com.bakdata.conquery.models.datasets.Dataset) ValidatorHelper(com.bakdata.conquery.models.exceptions.ValidatorHelper) OutputDescription(com.bakdata.conquery.models.preproc.outputs.OutputDescription) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) Response(javax.ws.rs.core.Response) TableInputDescriptor(com.bakdata.conquery.models.preproc.TableInputDescriptor) JsonIntegrationTest(com.bakdata.conquery.integration.json.JsonIntegrationTest) Query(com.bakdata.conquery.apiv1.query.Query) Jackson(com.bakdata.conquery.io.jackson.Jackson) MetaStorage(com.bakdata.conquery.io.storage.MetaStorage) Namespace(com.bakdata.conquery.models.worker.Namespace) Query(com.bakdata.conquery.apiv1.query.Query) QueryTest(com.bakdata.conquery.integration.json.QueryTest) Dataset(com.bakdata.conquery.models.datasets.Dataset) ImportId(com.bakdata.conquery.models.identifiable.ids.specific.ImportId) Namespace(com.bakdata.conquery.models.worker.Namespace) ModificationShieldedWorkerStorage(com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage) OutputDescription(com.bakdata.conquery.models.preproc.outputs.OutputDescription) ShardNode(com.bakdata.conquery.commands.ShardNode) MetaStorage(com.bakdata.conquery.io.storage.MetaStorage) Worker(com.bakdata.conquery.models.worker.Worker) TableInputDescriptor(com.bakdata.conquery.models.preproc.TableInputDescriptor) StandaloneSupport(com.bakdata.conquery.util.support.StandaloneSupport) File(java.io.File) TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor) RequiredTable(com.bakdata.conquery.integration.common.RequiredTable)

Example 2 with TableImportDescriptor

use of com.bakdata.conquery.models.preproc.TableImportDescriptor in project conquery by bakdata.

the class PreprocessorCommand method tryExtractDescriptor.

private Optional<PreprocessingJob> tryExtractDescriptor(Validator validator, Optional<String> tag, File descriptionFile, File outputDir, File csvDir) throws IOException {
    try {
        final TableImportDescriptor descriptor = TableImportDescriptor.read(descriptionFile);
        validator.validate(validator);
        final PreprocessingJob preprocessingJob = new PreprocessingJob(csvDir.toPath(), descriptionFile, outputDir.toPath(), tag, descriptor);
        // Override name to tag if present
        tag.ifPresent(descriptor::setName);
        return Optional.of(preprocessingJob);
    } catch (Exception e) {
        log.error("Failed to process " + LogUtil.printPath(descriptionFile), e);
        if (isFailFast) {
            doFail();
        }
        failed.add(descriptionFile.toString());
    }
    return Optional.empty();
}
Also used : TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) PreprocessingJob(com.bakdata.conquery.models.preproc.PreprocessingJob)

Example 3 with TableImportDescriptor

use of com.bakdata.conquery.models.preproc.TableImportDescriptor in project conquery by bakdata.

the class ImportDeletionTest method execute.

@Override
public void execute(String name, TestConquery testConquery) throws Exception {
    final StandaloneSupport conquery = testConquery.getSupport(name);
    MetaStorage storage = conquery.getMetaStorage();
    final String testJson = In.resource("/tests/query/DELETE_IMPORT_TESTS/SIMPLE_TREECONCEPT_Query.test.json").withUTF8().readAll();
    final Dataset dataset = conquery.getDataset();
    final Namespace namespace = conquery.getNamespace();
    final ImportId importId = ImportId.Parser.INSTANCE.parse(dataset.getName(), "test_table2", "test_table2");
    final QueryTest test = (QueryTest) JsonIntegrationTest.readJson(dataset, testJson);
    // Manually import data, so we can do our own work.
    {
        ValidatorHelper.failOnError(log, conquery.getValidator().validate(test));
        importSecondaryIds(conquery, test.getContent().getSecondaryIds());
        conquery.waitUntilWorkDone();
        LoadingUtil.importTables(conquery, test.getContent().getTables());
        conquery.waitUntilWorkDone();
        LoadingUtil.importConcepts(conquery, test.getRawConcepts());
        conquery.waitUntilWorkDone();
        LoadingUtil.importTableContents(conquery, test.getContent().getTables());
        conquery.waitUntilWorkDone();
    }
    final Query query = IntegrationUtils.parseQuery(conquery, test.getRawQuery());
    final int nImports = namespace.getStorage().getAllImports().size();
    // State before deletion.
    {
        log.info("Checking state before deletion");
        // Must contain the import.
        assertThat(namespace.getStorage().getAllImports()).filteredOn(imp -> imp.getId().equals(importId)).isNotEmpty();
        assertThat(namespace.getStorage().getCentralRegistry().getOptional(importId)).isNotEmpty();
        for (ShardNode node : conquery.getShardNodes()) {
            for (Worker worker : node.getWorkers().getWorkers().values()) {
                if (!worker.getInfo().getDataset().equals(dataset.getId())) {
                    continue;
                }
                final ModificationShieldedWorkerStorage workerStorage = worker.getStorage();
                assertThat(workerStorage.getAllCBlocks()).describedAs("CBlocks for Worker %s", worker.getInfo().getId()).filteredOn(block -> block.getBucket().getId().getDataset().equals(dataset.getId())).isNotEmpty();
                assertThat(workerStorage.getAllBuckets()).filteredOn(bucket -> bucket.getId().getDataset().equals(dataset.getId())).describedAs("Buckets for Worker %s", worker.getInfo().getId()).isNotEmpty();
                // Must contain the import.
                assertThat(workerStorage.getImport(importId)).isNotNull();
            }
        }
        log.info("Executing query before deletion");
        IntegrationUtils.assertQueryResult(conquery, query, 2L, ExecutionState.DONE, conquery.getTestUser(), 201);
    }
    // Delete the import.
    {
        log.info("Issuing deletion of import {}", importId);
        final URI deleteImportUri = HierarchyHelper.hierarchicalPath(conquery.defaultAdminURIBuilder(), AdminTablesResource.class, "deleteImport").buildFromMap(Map.of(ResourceConstants.DATASET, conquery.getDataset().getId(), ResourceConstants.TABLE, importId.getTable(), ResourceConstants.IMPORT_ID, importId));
        final Response delete = conquery.getClient().target(deleteImportUri).request(MediaType.APPLICATION_JSON).delete();
        assertThat(delete.getStatusInfo().getFamily()).isEqualTo(Response.Status.Family.SUCCESSFUL);
        conquery.waitUntilWorkDone();
    }
    // State after deletion.
    {
        log.info("Checking state after deletion");
        // We have deleted an import now there should be one less!
        assertThat(namespace.getStorage().getAllImports().size()).isEqualTo(nImports - 1);
        // The deleted import should not be found.
        assertThat(namespace.getStorage().getAllImports()).filteredOn(imp -> imp.getId().equals(importId)).isEmpty();
        for (ShardNode node : conquery.getShardNodes()) {
            for (Worker worker : node.getWorkers().getWorkers().values()) {
                if (!worker.getInfo().getDataset().equals(dataset.getId())) {
                    continue;
                }
                final ModificationShieldedWorkerStorage workerStorage = worker.getStorage();
                // No bucket should be found referencing the import.
                assertThat(workerStorage.getAllBuckets()).describedAs("Buckets for Worker %s", worker.getInfo().getId()).filteredOn(bucket -> bucket.getImp().getId().equals(importId)).isEmpty();
                // No CBlock associated with import may exist
                assertThat(workerStorage.getAllCBlocks()).describedAs("CBlocks for Worker %s", worker.getInfo().getId()).filteredOn(cBlock -> cBlock.getBucket().getId().getImp().equals(importId)).isEmpty();
                // Import should not exists anymore
                assertThat(workerStorage.getImport(importId)).describedAs("Import for Worker %s", worker.getInfo().getId()).isNull();
            }
        }
        log.info("Executing query after deletion");
        // Issue a query and assert that it has less content.
        IntegrationUtils.assertQueryResult(conquery, query, 1L, ExecutionState.DONE, conquery.getTestUser(), 201);
    }
    conquery.waitUntilWorkDone();
    // Load more data under the same name into the same table, with only the deleted import/table
    {
        // only import the deleted import/table
        final RequiredTable import2Table = test.getContent().getTables().stream().filter(table -> table.getName().equalsIgnoreCase(importId.getTable().getTable())).findFirst().orElseThrow();
        final ResourceFile csv = import2Table.getCsv();
        final String path = csv.getPath();
        // copy csv to tmp folder
        // Content 2.2 contains an extra entry of a value that hasn't been seen before.
        FileUtils.copyInputStreamToFile(In.resource(path.substring(0, path.lastIndexOf('/')) + "/" + "content2.2.csv").asStream(), new File(conquery.getTmpDir(), csv.getName()));
        File descriptionFile = new File(conquery.getTmpDir(), import2Table.getName() + ConqueryConstants.EXTENSION_DESCRIPTION);
        File preprocessedFile = new File(conquery.getTmpDir(), import2Table.getName() + ConqueryConstants.EXTENSION_PREPROCESSED);
        // create import descriptor
        TableImportDescriptor desc = new TableImportDescriptor();
        desc.setName(import2Table.getName());
        desc.setTable(import2Table.getName());
        TableInputDescriptor input = new TableInputDescriptor();
        {
            input.setPrimary(import2Table.getPrimaryColumn().createOutput());
            input.setSourceFile(import2Table.getCsv().getName());
            input.setOutput(new OutputDescription[import2Table.getColumns().length]);
            for (int i = 0; i < import2Table.getColumns().length; i++) {
                input.getOutput()[i] = import2Table.getColumns()[i].createOutput();
            }
        }
        desc.setInputs(new TableInputDescriptor[] { input });
        Jackson.MAPPER.writeValue(descriptionFile, desc);
        // preprocess
        conquery.preprocessTmp(conquery.getTmpDir(), List.of(descriptionFile));
        // import preprocessedFiles
        conquery.getDatasetsProcessor().addImport(conquery.getNamespace(), new GZIPInputStream(new FileInputStream(preprocessedFile)));
        conquery.waitUntilWorkDone();
    }
    // State after reimport.
    {
        log.info("Checking state after re-import");
        assertThat(namespace.getStorage().getAllImports().size()).isEqualTo(nImports);
        for (ShardNode node : conquery.getShardNodes()) {
            for (Worker worker : node.getWorkers().getWorkers().values()) {
                if (!worker.getInfo().getDataset().equals(dataset.getId())) {
                    continue;
                }
                final ModificationShieldedWorkerStorage workerStorage = worker.getStorage();
                assertThat(workerStorage.getAllBuckets()).describedAs("Buckets for Worker %s", worker.getInfo().getId()).filteredOn(bucket -> bucket.getImp().getId().equals(importId)).filteredOn(bucket -> bucket.getId().getDataset().equals(dataset.getId())).isNotEmpty();
            }
        }
        log.info("Executing query after re-import");
        // Issue a query and assert that it has the same content as the first time around.
        IntegrationUtils.assertQueryResult(conquery, query, 2L, ExecutionState.DONE, conquery.getTestUser(), 201);
    }
    // Finally, restart conquery and assert again, that the data is correct.
    {
        testConquery.shutdown();
        // restart
        testConquery.beforeAll();
        StandaloneSupport conquery2 = testConquery.openDataset(dataset.getId());
        log.info("Checking state after re-start");
        {
            assertThat(namespace.getStorage().getAllImports().size()).isEqualTo(2);
            for (ShardNode node : conquery2.getShardNodes()) {
                for (Worker worker : node.getWorkers().getWorkers().values()) {
                    if (!worker.getInfo().getDataset().equals(dataset.getId()))
                        continue;
                    final ModificationShieldedWorkerStorage workerStorage = worker.getStorage();
                    assertThat(workerStorage.getAllBuckets()).describedAs("Buckets for Worker %s", worker.getInfo().getId()).filteredOn(bucket -> bucket.getId().getDataset().equals(dataset.getId())).filteredOn(bucket -> bucket.getImp().getId().equals(importId)).isNotEmpty();
                }
            }
            log.info("Executing query after re-import");
            // Issue a query and assert that it has the same content as the first time around.
            IntegrationUtils.assertQueryResult(conquery2, query, 2L, ExecutionState.DONE, conquery.getTestUser(), 201);
        }
    }
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) ExecutionState(com.bakdata.conquery.models.execution.ExecutionState) Assertions.assertThat(org.assertj.core.api.Assertions.assertThat) AdminTablesResource(com.bakdata.conquery.resources.admin.rest.AdminTablesResource) RequiredTable(com.bakdata.conquery.integration.common.RequiredTable) QueryTest(com.bakdata.conquery.integration.json.QueryTest) Worker(com.bakdata.conquery.models.worker.Worker) MediaType(javax.ws.rs.core.MediaType) ModificationShieldedWorkerStorage(com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage) In(com.github.powerlibraries.io.In) Map(java.util.Map) LoadingUtil.importSecondaryIds(com.bakdata.conquery.integration.common.LoadingUtil.importSecondaryIds) URI(java.net.URI) ConqueryConstants(com.bakdata.conquery.ConqueryConstants) ResourceFile(com.bakdata.conquery.integration.common.ResourceFile) IntegrationUtils(com.bakdata.conquery.integration.common.IntegrationUtils) ProgrammaticIntegrationTest(com.bakdata.conquery.integration.tests.ProgrammaticIntegrationTest) TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor) TestConquery(com.bakdata.conquery.util.support.TestConquery) ShardNode(com.bakdata.conquery.commands.ShardNode) ResourceConstants(com.bakdata.conquery.resources.ResourceConstants) FileUtils(org.apache.commons.io.FileUtils) FileInputStream(java.io.FileInputStream) LoadingUtil(com.bakdata.conquery.integration.common.LoadingUtil) File(java.io.File) ImportId(com.bakdata.conquery.models.identifiable.ids.specific.ImportId) StandaloneSupport(com.bakdata.conquery.util.support.StandaloneSupport) Dataset(com.bakdata.conquery.models.datasets.Dataset) ValidatorHelper(com.bakdata.conquery.models.exceptions.ValidatorHelper) OutputDescription(com.bakdata.conquery.models.preproc.outputs.OutputDescription) List(java.util.List) Slf4j(lombok.extern.slf4j.Slf4j) Response(javax.ws.rs.core.Response) TableInputDescriptor(com.bakdata.conquery.models.preproc.TableInputDescriptor) JsonIntegrationTest(com.bakdata.conquery.integration.json.JsonIntegrationTest) Query(com.bakdata.conquery.apiv1.query.Query) Jackson(com.bakdata.conquery.io.jackson.Jackson) MetaStorage(com.bakdata.conquery.io.storage.MetaStorage) HierarchyHelper(com.bakdata.conquery.resources.hierarchies.HierarchyHelper) Namespace(com.bakdata.conquery.models.worker.Namespace) Query(com.bakdata.conquery.apiv1.query.Query) QueryTest(com.bakdata.conquery.integration.json.QueryTest) Dataset(com.bakdata.conquery.models.datasets.Dataset) ImportId(com.bakdata.conquery.models.identifiable.ids.specific.ImportId) URI(java.net.URI) Namespace(com.bakdata.conquery.models.worker.Namespace) FileInputStream(java.io.FileInputStream) ModificationShieldedWorkerStorage(com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage) Response(javax.ws.rs.core.Response) GZIPInputStream(java.util.zip.GZIPInputStream) ResourceFile(com.bakdata.conquery.integration.common.ResourceFile) ShardNode(com.bakdata.conquery.commands.ShardNode) MetaStorage(com.bakdata.conquery.io.storage.MetaStorage) Worker(com.bakdata.conquery.models.worker.Worker) TableInputDescriptor(com.bakdata.conquery.models.preproc.TableInputDescriptor) StandaloneSupport(com.bakdata.conquery.util.support.StandaloneSupport) ResourceFile(com.bakdata.conquery.integration.common.ResourceFile) File(java.io.File) TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor) RequiredTable(com.bakdata.conquery.integration.common.RequiredTable)

Example 4 with TableImportDescriptor

use of com.bakdata.conquery.models.preproc.TableImportDescriptor in project conquery by bakdata.

the class LoadingUtil method generateCqpp.

public static List<File> generateCqpp(StandaloneSupport support, Collection<RequiredTable> tables) throws Exception {
    List<File> preprocessedFiles = new ArrayList<>();
    List<File> descriptions = new ArrayList<>();
    for (RequiredTable rTable : tables) {
        // copy csv to tmp folder
        String name = rTable.getName();
        FileUtils.copyInputStreamToFile(rTable.getCsv().stream(), new File(support.getTmpDir(), rTable.getCsv().getName()));
        // create import descriptor
        final File descriptionFile = support.getTmpDir().toPath().resolve(name + ConqueryConstants.EXTENSION_DESCRIPTION).toFile();
        final File outFile = support.getTmpDir().toPath().resolve(name + EXTENSION_PREPROCESSED).toFile();
        TableImportDescriptor desc = new TableImportDescriptor();
        desc.setName(name);
        desc.setTable(name);
        TableInputDescriptor input = new TableInputDescriptor();
        {
            input.setPrimary(rTable.getPrimaryColumn().createOutput());
            input.setSourceFile(rTable.getCsv().getName());
            input.setOutput(new OutputDescription[rTable.getColumns().length]);
            for (int i = 0; i < rTable.getColumns().length; i++) {
                input.getOutput()[i] = rTable.getColumns()[i].createOutput();
            }
        }
        desc.setInputs(new TableInputDescriptor[] { input });
        Jackson.MAPPER.writeValue(descriptionFile, desc);
        descriptions.add(descriptionFile);
        preprocessedFiles.add(outFile);
    }
    // preprocess
    support.preprocessTmp(support.getTmpDir(), descriptions);
    // clear the MDC location from the preprocessor
    ConqueryMDC.clearLocation();
    return preprocessedFiles;
}
Also used : OutputDescription(com.bakdata.conquery.models.preproc.outputs.OutputDescription) ArrayList(java.util.ArrayList) TableInputDescriptor(com.bakdata.conquery.models.preproc.TableInputDescriptor) File(java.io.File) TableImportDescriptor(com.bakdata.conquery.models.preproc.TableImportDescriptor)

Aggregations

TableImportDescriptor (com.bakdata.conquery.models.preproc.TableImportDescriptor)4 TableInputDescriptor (com.bakdata.conquery.models.preproc.TableInputDescriptor)3 OutputDescription (com.bakdata.conquery.models.preproc.outputs.OutputDescription)3 File (java.io.File)3 ConqueryConstants (com.bakdata.conquery.ConqueryConstants)2 Query (com.bakdata.conquery.apiv1.query.Query)2 ShardNode (com.bakdata.conquery.commands.ShardNode)2 IntegrationUtils (com.bakdata.conquery.integration.common.IntegrationUtils)2 LoadingUtil (com.bakdata.conquery.integration.common.LoadingUtil)2 LoadingUtil.importSecondaryIds (com.bakdata.conquery.integration.common.LoadingUtil.importSecondaryIds)2 RequiredTable (com.bakdata.conquery.integration.common.RequiredTable)2 JsonIntegrationTest (com.bakdata.conquery.integration.json.JsonIntegrationTest)2 QueryTest (com.bakdata.conquery.integration.json.QueryTest)2 Jackson (com.bakdata.conquery.io.jackson.Jackson)2 MetaStorage (com.bakdata.conquery.io.storage.MetaStorage)2 ModificationShieldedWorkerStorage (com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage)2 Dataset (com.bakdata.conquery.models.datasets.Dataset)2 ValidatorHelper (com.bakdata.conquery.models.exceptions.ValidatorHelper)2 ExecutionState (com.bakdata.conquery.models.execution.ExecutionState)2 ImportId (com.bakdata.conquery.models.identifiable.ids.specific.ImportId)2