Search in sources :

Example 11 with TreeConcept

use of com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept in project conquery by bakdata.

the class CBlockTest method serialize.

@SneakyThrows
@Test
public void serialize() throws IOException, JSONException {
    final CentralRegistry registry = new CentralRegistry();
    final Dataset dataset = new Dataset();
    dataset.setName("dataset");
    final TreeConcept concept = new TreeConcept();
    concept.setDataset(dataset);
    concept.setName("concept");
    final ConceptTreeConnector connector = new ConceptTreeConnector();
    connector.setName("connector");
    connector.setConcept(concept);
    concept.setConnectors(List.of(connector));
    final Table table = new Table();
    table.setName("table");
    table.setDataset(dataset);
    final Import imp = new Import(table);
    imp.setName("import");
    final Bucket bucket = new Bucket(0, 0, 0, new ColumnStore[0], Collections.emptySet(), new int[10], new int[10], imp);
    final CBlock cBlock = CBlock.createCBlock(connector, bucket, 10);
    registry.register(dataset).register(table).register(concept).register(connector).register(bucket).register(imp);
    SerializationTestUtil.forType(CBlock.class).registry(registry).test(cBlock);
}
Also used : Table(com.bakdata.conquery.models.datasets.Table) Import(com.bakdata.conquery.models.datasets.Import) Dataset(com.bakdata.conquery.models.datasets.Dataset) TreeConcept(com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept) ConceptTreeConnector(com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector) CentralRegistry(com.bakdata.conquery.models.identifiable.CentralRegistry) Test(org.junit.jupiter.api.Test) SneakyThrows(lombok.SneakyThrows)

Example 12 with TreeConcept

use of com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept in project conquery by bakdata.

the class BucketManager method fullUpdate.

@SneakyThrows
public void fullUpdate() {
    CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService());
    for (Concept<?> c : storage.getAllConcepts()) {
        if (!(c instanceof TreeConcept)) {
            continue;
        }
        for (ConceptTreeConnector con : ((TreeConcept) c).getConnectors()) {
            for (Bucket bucket : storage.getAllBuckets()) {
                CBlockId cBlockId = new CBlockId(bucket.getId(), con.getId());
                if (!con.getTable().equals(bucket.getTable())) {
                    continue;
                }
                if (hasCBlock(cBlockId)) {
                    log.trace("Skip calculation of CBlock[{}], because it was loaded from the storage.", cBlockId);
                    continue;
                }
                log.warn("CBlock[{}] missing in Storage. Queuing recalculation", cBlockId);
                job.addCBlock(bucket, con);
            }
        }
    }
    if (!job.isEmpty()) {
        jobManager.addSlowJob(job);
    }
}
Also used : CBlockId(com.bakdata.conquery.models.identifiable.ids.specific.CBlockId) TreeConcept(com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept) ConceptTreeConnector(com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector) CalculateCBlocksJob(com.bakdata.conquery.models.jobs.CalculateCBlocksJob) SneakyThrows(lombok.SneakyThrows)

Example 13 with TreeConcept

use of com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept in project conquery by bakdata.

the class CBlock method calculateSpecificChildrenPaths.

/**
 * Calculates the path for each event from the root of the {@link TreeConcept} to the most specific {@link ConceptTreeChild}
 * denoted by the individual {@link ConceptTreeChild#getPrefix()}.
 */
private static int[][] calculateSpecificChildrenPaths(Bucket bucket, ConceptTreeConnector connector) {
    final Column column = connector.getColumn();
    final TreeConcept treeConcept = connector.getConcept();
    final StringStore stringStore;
    // If we have a column and it is of string-type, we create indices and caches.
    if (column != null && bucket.getStores()[column.getPosition()] instanceof StringStore) {
        stringStore = (StringStore) bucket.getStores()[column.getPosition()];
        // Create index and insert into Tree.
        TreeChildPrefixIndex.putIndexInto(treeConcept);
        treeConcept.initializeIdCache(stringStore, bucket.getImp());
    } else // No column only possible if we have just one tree element!
    if (treeConcept.countElements() == 1) {
        stringStore = null;
    } else {
        throw new IllegalStateException(String.format("Cannot build tree over Connector[%s] without Column", connector.getId()));
    }
    final int[][] mostSpecificChildren = new int[bucket.getNumberOfEvents()][];
    Arrays.fill(mostSpecificChildren, ConceptTreeConnector.NOT_CONTAINED);
    final ConceptTreeCache cache = treeConcept.getCache(bucket.getImp());
    final int[] root = treeConcept.getPrefix();
    for (int event = 0; event < bucket.getNumberOfEvents(); event++) {
        try {
            // Events can also be filtered, allowing a single table to be used by multiple connectors.
            if (column != null && !bucket.has(event, column)) {
                mostSpecificChildren[event] = Connector.NOT_CONTAINED;
                continue;
            }
            String stringValue = "";
            int valueIndex = -1;
            if (stringStore != null) {
                valueIndex = bucket.getString(event, column);
                stringValue = stringStore.getElement(valueIndex);
            }
            // Lazy evaluation of map to avoid allocations if possible.
            // Copy event for closure.
            final int _event = event;
            final CalculatedValue<Map<String, Object>> rowMap = new CalculatedValue<>(() -> bucket.calculateMap(_event));
            if ((connector.getCondition() != null && !connector.getCondition().matches(stringValue, rowMap))) {
                mostSpecificChildren[event] = Connector.NOT_CONTAINED;
                continue;
            }
            ConceptTreeChild child = cache == null ? treeConcept.findMostSpecificChild(stringValue, rowMap) : cache.findMostSpecificChild(valueIndex, stringValue, rowMap);
            // All unresolved elements resolve to the root.
            if (child == null) {
                mostSpecificChildren[event] = root;
                continue;
            }
            // put path into event
            mostSpecificChildren[event] = child.getPrefix();
        } catch (ConceptConfigurationException ex) {
            log.error("Failed to resolve event {}-{} against concept {}", bucket, event, treeConcept, ex);
        }
    }
    if (cache != null) {
        log.trace("Hits: {}, Misses: {}, Hits/Misses: {}, %Hits: {} (Up to now)", cache.getHits(), cache.getMisses(), (double) cache.getHits() / cache.getMisses(), (double) cache.getHits() / (cache.getHits() + cache.getMisses()));
    }
    return mostSpecificChildren;
}
Also used : ConceptTreeCache(com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeCache) ConceptTreeChild(com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild) StringStore(com.bakdata.conquery.models.events.stores.root.StringStore) ConceptConfigurationException(com.bakdata.conquery.models.exceptions.ConceptConfigurationException) Column(com.bakdata.conquery.models.datasets.Column) TreeConcept(com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept) CalculatedValue(com.bakdata.conquery.util.CalculatedValue) Map(java.util.Map)

Example 14 with TreeConcept

use of com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept in project conquery by bakdata.

the class ConceptResolutionTest method execute.

@Override
public void execute(StandaloneSupport conquery) throws Exception {
    // read test sepcification
    String testJson = In.resource("/tests/query/SIMPLE_TREECONCEPT_QUERY/SIMPLE_TREECONCEPT_Query.test.json").withUTF8().readAll();
    DatasetId dataset = conquery.getDataset().getId();
    ConqueryTestSpec test = JsonIntegrationTest.readJson(dataset, testJson);
    ValidatorHelper.failOnError(log, conquery.getValidator().validate(test));
    test.importRequiredData(conquery);
    FilterSearch.updateSearch(conquery.getNamespace().getNamespaces(), Collections.singleton(conquery.getNamespace().getDataset()), conquery.getDatasetsProcessor().getJobManager(), conquery.getConfig().getCsv());
    conquery.waitUntilWorkDone();
    ConceptsProcessor processor = new ConceptsProcessor(conquery.getNamespace().getNamespaces());
    TreeConcept concept = (TreeConcept) conquery.getNamespace().getStorage().getAllConcepts().iterator().next();
    ResolvedConceptsResult resolved = processor.resolveConceptElements(concept, List.of("A1", "unknown"));
    // check the resolved values
    assertThat(resolved).isNotNull();
    assertThat(resolved.getResolvedConcepts().stream().map(IId::toString)).containsExactlyInAnyOrder("ConceptResolutionTest.test_tree.test_child1");
    assertThat(resolved.getUnknownCodes()).containsExactlyInAnyOrder("unknown");
}
Also used : IId(com.bakdata.conquery.models.identifiable.ids.IId) TreeConcept(com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept) ConqueryTestSpec(com.bakdata.conquery.integration.json.ConqueryTestSpec) ConceptsProcessor(com.bakdata.conquery.resources.api.ConceptsProcessor) DatasetId(com.bakdata.conquery.models.identifiable.ids.specific.DatasetId) ResolvedConceptsResult(com.bakdata.conquery.resources.api.ConceptsProcessor.ResolvedConceptsResult)

Aggregations

TreeConcept (com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept)14 ConceptTreeConnector (com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector)9 Dataset (com.bakdata.conquery.models.datasets.Dataset)7 Table (com.bakdata.conquery.models.datasets.Table)5 CentralRegistry (com.bakdata.conquery.models.identifiable.CentralRegistry)5 Test (org.junit.jupiter.api.Test)5 CQTable (com.bakdata.conquery.apiv1.query.concept.filter.CQTable)4 Column (com.bakdata.conquery.models.datasets.Column)4 CQConcept (com.bakdata.conquery.apiv1.query.concept.specific.CQConcept)3 ConqueryTestSpec (com.bakdata.conquery.integration.json.ConqueryTestSpec)3 IdMapSerialisationTest (com.bakdata.conquery.models.identifiable.IdMapSerialisationTest)3 CBlockId (com.bakdata.conquery.models.identifiable.ids.specific.CBlockId)3 CalculateCBlocksJob (com.bakdata.conquery.models.jobs.CalculateCBlocksJob)3 SneakyThrows (lombok.SneakyThrows)3 ConceptQuery (com.bakdata.conquery.apiv1.query.ConceptQuery)2 Import (com.bakdata.conquery.models.datasets.Import)2 Connector (com.bakdata.conquery.models.datasets.concepts.Connector)2 ValidityDate (com.bakdata.conquery.models.datasets.concepts.ValidityDate)2 DatasetId (com.bakdata.conquery.models.identifiable.ids.specific.DatasetId)2 IOException (java.io.IOException)2