Search in sources :

Example 1 with DerivativeDataSourceMetadata

use of org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata in project druid by druid-io.

the class DerivativeDataSourceManager method updateDerivatives.

private void updateDerivatives() {
    List<Pair<String, DerivativeDataSourceMetadata>> derivativesInDatabase = connector.retryWithHandle(handle -> handle.createQuery(StringUtils.format("SELECT DISTINCT dataSource,commit_metadata_payload FROM %1$s", dbTables.get().getDataSourceTable())).map((int index, ResultSet r, StatementContext ctx) -> {
        String datasourceName = r.getString("dataSource");
        DataSourceMetadata payload = JacksonUtils.readValue(objectMapper, r.getBytes("commit_metadata_payload"), DataSourceMetadata.class);
        if (!(payload instanceof DerivativeDataSourceMetadata)) {
            return null;
        }
        DerivativeDataSourceMetadata metadata = (DerivativeDataSourceMetadata) payload;
        return new Pair<>(datasourceName, metadata);
    }).list());
    List<DerivativeDataSource> derivativeDataSources = derivativesInDatabase.parallelStream().filter(data -> data != null).map(derivatives -> {
        String name = derivatives.lhs;
        DerivativeDataSourceMetadata metadata = derivatives.rhs;
        String baseDataSource = metadata.getBaseDataSource();
        long avgSizePerGranularity = getAvgSizePerGranularity(name);
        log.info("find derivatives: {bases=%s, derivative=%s, dimensions=%s, metrics=%s, avgSize=%s}", baseDataSource, name, metadata.getDimensions(), metadata.getMetrics(), avgSizePerGranularity);
        return new DerivativeDataSource(name, baseDataSource, metadata.getColumns(), avgSizePerGranularity);
    }).filter(derivatives -> derivatives.getAvgSizeBasedGranularity() > 0).collect(Collectors.toList());
    ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> newDerivatives = new ConcurrentHashMap<>();
    for (DerivativeDataSource derivative : derivativeDataSources) {
        newDerivatives.computeIfAbsent(derivative.getBaseDataSource(), ds -> new TreeSet<>()).add(derivative);
    }
    ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> current;
    do {
        current = DERIVATIVES_REF.get();
    } while (!DERIVATIVES_REF.compareAndSet(current, newDerivatives));
}
Also used : MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) SortedSet(java.util.SortedSet) Intervals(org.apache.druid.java.util.common.Intervals) Inject(com.google.inject.Inject) Supplier(com.google.common.base.Supplier) Duration(org.joda.time.Duration) LifecycleStart(org.apache.druid.java.util.common.lifecycle.LifecycleStart) AtomicReference(java.util.concurrent.atomic.AtomicReference) StatementContext(org.skife.jdbi.v2.StatementContext) TreeSet(java.util.TreeSet) Pair(org.apache.druid.java.util.common.Pair) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) LifecycleStop(org.apache.druid.java.util.common.lifecycle.LifecycleStop) ResultSet(java.sql.ResultSet) ManageLifecycle(org.apache.druid.guice.ManageLifecycle) ListeningScheduledExecutorService(com.google.common.util.concurrent.ListeningScheduledExecutorService) DateTimes(org.apache.druid.java.util.common.DateTimes) SQLMetadataConnector(org.apache.druid.metadata.SQLMetadataConnector) ImmutableSet(com.google.common.collect.ImmutableSet) Execs(org.apache.druid.java.util.common.concurrent.Execs) JacksonUtils(org.apache.druid.java.util.common.jackson.JacksonUtils) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) MetadataStorageTablesConfig(org.apache.druid.metadata.MetadataStorageTablesConfig) ImmutableMap(com.google.common.collect.ImmutableMap) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Handle(org.skife.jdbi.v2.Handle) DerivativeDataSourceMetadata(org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata) DataSegment(org.apache.druid.timeline.DataSegment) SortedSet(java.util.SortedSet) StatementContext(org.skife.jdbi.v2.StatementContext) DerivativeDataSourceMetadata(org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) DerivativeDataSourceMetadata(org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata) TreeSet(java.util.TreeSet) ResultSet(java.sql.ResultSet) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pair(org.apache.druid.java.util.common.Pair)

Example 2 with DerivativeDataSourceMetadata

use of org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata in project druid by druid-io.

the class DatasourceOptimizerTest method testOptimize.

@Test(timeout = 60_000L)
public void testOptimize() throws InterruptedException {
    // insert datasource metadata
    String dataSource = "derivative";
    String baseDataSource = "base";
    Set<String> dims = Sets.newHashSet("dim1", "dim2", "dim3");
    Set<String> metrics = Sets.newHashSet("cost");
    DerivativeDataSourceMetadata metadata = new DerivativeDataSourceMetadata(baseDataSource, dims, metrics);
    metadataStorageCoordinator.insertDataSourceMetadata(dataSource, metadata);
    // insert base datasource segments
    List<Boolean> baseResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04", "2011-04-04/2011-04-05", "2011-04-05/2011-04-06"), interval -> {
        final DataSegment segment = createDataSegment("base", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3", "dim4"), 1024 * 1024);
        try {
            metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
            announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
        } catch (IOException e) {
            return false;
        }
        return true;
    });
    // insert derivative segments
    List<Boolean> derivativeResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04"), interval -> {
        final DataSegment segment = createDataSegment("derivative", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3"), 1024);
        try {
            metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
            announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
        } catch (IOException e) {
            return false;
        }
        return true;
    });
    Assert.assertFalse(baseResult.contains(false));
    Assert.assertFalse(derivativeResult.contains(false));
    derivativesManager.start();
    while (DerivativeDataSourceManager.getAllDerivatives().isEmpty()) {
        TimeUnit.SECONDS.sleep(1L);
    }
    // build user query
    TopNQuery userQuery = new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals("2011-04-01/2011-04-06").aggregators(new LongSumAggregatorFactory("cost", "cost")).build();
    List<Query> expectedQueryAfterOptimizing = Lists.newArrayList(new TopNQueryBuilder().dataSource("derivative").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-01/2011-04-04")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build(), new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-04/2011-04-06")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build());
    Assert.assertEquals(expectedQueryAfterOptimizing, optimizer.optimize(userQuery));
    derivativesManager.stop();
}
Also used : TopNQueryBuilder(org.apache.druid.query.topn.TopNQueryBuilder) TopNQuery(org.apache.druid.query.topn.TopNQuery) Query(org.apache.druid.query.Query) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) IOException(java.io.IOException) DataSegment(org.apache.druid.timeline.DataSegment) DerivativeDataSourceMetadata(org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata) TopNQuery(org.apache.druid.query.topn.TopNQuery) Test(org.junit.Test)

Aggregations

DerivativeDataSourceMetadata (org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata)2 DataSegment (org.apache.druid.timeline.DataSegment)2 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)1 Supplier (com.google.common.base.Supplier)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 ListeningScheduledExecutorService (com.google.common.util.concurrent.ListeningScheduledExecutorService)1 MoreExecutors (com.google.common.util.concurrent.MoreExecutors)1 Inject (com.google.inject.Inject)1 IOException (java.io.IOException)1 ResultSet (java.sql.ResultSet)1 HashSet (java.util.HashSet)1 List (java.util.List)1 Set (java.util.Set)1 SortedSet (java.util.SortedSet)1 TreeSet (java.util.TreeSet)1 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)1 TimeUnit (java.util.concurrent.TimeUnit)1 AtomicReference (java.util.concurrent.atomic.AtomicReference)1