use of org.apache.druid.query.SegmentDescriptor in project hive by apache.
the class DruidQueryBasedInputFormat method distributeScanQuery.
/* New method that distributes the Scan query by creating splits containing
* information about different Druid nodes that have the data for the given
* query. */
private static HiveDruidSplit[] distributeScanQuery(String address, ScanQuery query, Path dummyPath) throws IOException {
// If it has a limit, we use it and we do not distribute the query
final boolean isFetch = query.getScanRowsLimit() < Long.MAX_VALUE;
if (isFetch) {
return new HiveDruidSplit[] { new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(query), dummyPath, new String[] { address }) };
}
final List<LocatedSegmentDescriptor> segmentDescriptors = fetchLocatedSegmentDescriptors(address, query);
// Create one input split for each segment
final int numSplits = segmentDescriptors.size();
final HiveDruidSplit[] splits = new HiveDruidSplit[segmentDescriptors.size()];
for (int i = 0; i < numSplits; i++) {
final LocatedSegmentDescriptor locatedSD = segmentDescriptors.get(i);
final String[] hosts = new String[locatedSD.getLocations().size() + 1];
for (int j = 0; j < locatedSD.getLocations().size(); j++) {
hosts[j] = locatedSD.getLocations().get(j).getHost();
}
// Default to broker if all other hosts fail.
hosts[locatedSD.getLocations().size()] = address;
// Create partial Select query
final SegmentDescriptor newSD = new SegmentDescriptor(locatedSD.getInterval(), locatedSD.getVersion(), locatedSD.getPartitionNumber());
final Query partialQuery = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(Lists.newArrayList(newSD)));
splits[i] = new HiveDruidSplit(DruidStorageHandlerUtils.JSON_MAPPER.writeValueAsString(partialQuery), dummyPath, hosts);
}
return splits;
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class ScanBenchmark method querySingleIncrementalIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleIncrementalIndex(Blackhole blackhole, IncrementalIndexState state) {
QueryRunner<ScanResultValue> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("incIndex"), new IncrementalIndexSegment(state.incIndex, SegmentId.dummy("incIndex")));
Query effectiveQuery = query.withDataSource(new TableDataSource("incIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
blackhole.consume(results);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class ScanBenchmark method querySingleQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("qIndex"), new QueryableIndexSegment(state.qIndexes.get(0), SegmentId.dummy("qIndex")));
Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
blackhole.consume(results);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class ScanBenchmark method queryMultiQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
List<SegmentDescriptor> segmentDescriptors = new ArrayList<>();
List<QueryRunner<Row>> runners = new ArrayList<>();
QueryToolChest toolChest = factory.getToolchest();
for (int i = 0; i < state.numSegments; i++) {
String segmentName = "qIndex";
final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy(segmentName), new QueryableIndexSegment(state.qIndexes.get(i), SegmentId.dummy(segmentName, i)));
segmentDescriptors.add(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", i));
runners.add(toolChest.preMergeQueryDecoration(runner));
}
QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, runners)), toolChest));
Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(segmentDescriptors)).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
Sequence<Result<ScanResultValue>> queryResult = theRunner.run(QueryPlus.wrap(effectiveQuery), ResponseContext.createEmpty());
List<Result<ScanResultValue>> results = queryResult.toList();
blackhole.consume(results);
}
use of org.apache.druid.query.SegmentDescriptor in project druid by druid-io.
the class TimeCompareBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
setupQueries();
String schemaName = "basic";
schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
segmentIntervals = new Interval[numSegments];
long startMillis = schemaInfo.getDataInterval().getStartMillis();
long endMillis = schemaInfo.getDataInterval().getEndMillis();
long partialIntervalMillis = (endMillis - startMillis) / numSegments;
for (int i = 0; i < numSegments; i++) {
long partialEndMillis = startMillis + partialIntervalMillis;
segmentIntervals[i] = Intervals.utc(startMillis, partialEndMillis);
log.info("Segment [%d] with interval [%s]", i, segmentIntervals[i]);
startMillis = partialEndMillis;
}
incIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment " + i);
DataGenerator gen = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, segmentIntervals[i], rowsPerSegment);
IncrementalIndex incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
incIndexes.add(incIndex);
}
tmpDir = FileUtils.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
qIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec(), null);
QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
qIndexes.add(qIndex);
}
List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
QueryToolChest toolChest = topNFactory.getToolchest();
for (int i = 0; i < numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(topNFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
singleSegmentRunners.add(new PerSegmentOptimizingQueryRunner<>(toolChest.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
}
topNRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(topNFactory.mergeRunners(executorService, singleSegmentRunners)), toolChest));
List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunnersT = new ArrayList<>();
QueryToolChest toolChestT = timeseriesFactory.getToolchest();
for (int i = 0; i < numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(timeseriesFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
singleSegmentRunnersT.add(new PerSegmentOptimizingQueryRunner<>(toolChestT.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
}
timeseriesRunner = toolChestT.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChestT.mergeResults(timeseriesFactory.mergeRunners(executorService, singleSegmentRunnersT)), toolChestT));
}
Aggregations