use of org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata in project druid by druid-io.
the class DerivativeDataSourceManager method updateDerivatives.
private void updateDerivatives() {
List<Pair<String, DerivativeDataSourceMetadata>> derivativesInDatabase = connector.retryWithHandle(handle -> handle.createQuery(StringUtils.format("SELECT DISTINCT dataSource,commit_metadata_payload FROM %1$s", dbTables.get().getDataSourceTable())).map((int index, ResultSet r, StatementContext ctx) -> {
String datasourceName = r.getString("dataSource");
DataSourceMetadata payload = JacksonUtils.readValue(objectMapper, r.getBytes("commit_metadata_payload"), DataSourceMetadata.class);
if (!(payload instanceof DerivativeDataSourceMetadata)) {
return null;
}
DerivativeDataSourceMetadata metadata = (DerivativeDataSourceMetadata) payload;
return new Pair<>(datasourceName, metadata);
}).list());
List<DerivativeDataSource> derivativeDataSources = derivativesInDatabase.parallelStream().filter(data -> data != null).map(derivatives -> {
String name = derivatives.lhs;
DerivativeDataSourceMetadata metadata = derivatives.rhs;
String baseDataSource = metadata.getBaseDataSource();
long avgSizePerGranularity = getAvgSizePerGranularity(name);
log.info("find derivatives: {bases=%s, derivative=%s, dimensions=%s, metrics=%s, avgSize=%s}", baseDataSource, name, metadata.getDimensions(), metadata.getMetrics(), avgSizePerGranularity);
return new DerivativeDataSource(name, baseDataSource, metadata.getColumns(), avgSizePerGranularity);
}).filter(derivatives -> derivatives.getAvgSizeBasedGranularity() > 0).collect(Collectors.toList());
ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> newDerivatives = new ConcurrentHashMap<>();
for (DerivativeDataSource derivative : derivativeDataSources) {
newDerivatives.computeIfAbsent(derivative.getBaseDataSource(), ds -> new TreeSet<>()).add(derivative);
}
ConcurrentHashMap<String, SortedSet<DerivativeDataSource>> current;
do {
current = DERIVATIVES_REF.get();
} while (!DERIVATIVES_REF.compareAndSet(current, newDerivatives));
}
use of org.apache.druid.indexing.materializedview.DerivativeDataSourceMetadata in project druid by druid-io.
the class DatasourceOptimizerTest method testOptimize.
@Test(timeout = 60_000L)
public void testOptimize() throws InterruptedException {
// insert datasource metadata
String dataSource = "derivative";
String baseDataSource = "base";
Set<String> dims = Sets.newHashSet("dim1", "dim2", "dim3");
Set<String> metrics = Sets.newHashSet("cost");
DerivativeDataSourceMetadata metadata = new DerivativeDataSourceMetadata(baseDataSource, dims, metrics);
metadataStorageCoordinator.insertDataSourceMetadata(dataSource, metadata);
// insert base datasource segments
List<Boolean> baseResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04", "2011-04-04/2011-04-05", "2011-04-05/2011-04-06"), interval -> {
final DataSegment segment = createDataSegment("base", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3", "dim4"), 1024 * 1024);
try {
metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
} catch (IOException e) {
return false;
}
return true;
});
// insert derivative segments
List<Boolean> derivativeResult = Lists.transform(ImmutableList.of("2011-04-01/2011-04-02", "2011-04-02/2011-04-03", "2011-04-03/2011-04-04"), interval -> {
final DataSegment segment = createDataSegment("derivative", interval, "v1", Lists.newArrayList("dim1", "dim2", "dim3"), 1024);
try {
metadataStorageCoordinator.announceHistoricalSegments(Sets.newHashSet(segment));
announceSegmentForServer(druidServer, segment, zkPathsConfig, jsonMapper);
} catch (IOException e) {
return false;
}
return true;
});
Assert.assertFalse(baseResult.contains(false));
Assert.assertFalse(derivativeResult.contains(false));
derivativesManager.start();
while (DerivativeDataSourceManager.getAllDerivatives().isEmpty()) {
TimeUnit.SECONDS.sleep(1L);
}
// build user query
TopNQuery userQuery = new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals("2011-04-01/2011-04-06").aggregators(new LongSumAggregatorFactory("cost", "cost")).build();
List<Query> expectedQueryAfterOptimizing = Lists.newArrayList(new TopNQueryBuilder().dataSource("derivative").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-01/2011-04-04")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build(), new TopNQueryBuilder().dataSource("base").granularity(QueryRunnerTestHelper.ALL_GRAN).dimension("dim1").metric("cost").threshold(4).intervals(new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.of("2011-04-04/2011-04-06")))).aggregators(new LongSumAggregatorFactory("cost", "cost")).build());
Assert.assertEquals(expectedQueryAfterOptimizing, optimizer.optimize(userQuery));
derivativesManager.stop();
}
Aggregations