use of org.apache.druid.timeline.SegmentId in project druid by druid-io.
the class TopNBenchmark method queryMultiQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
QueryToolChest toolChest = factory.getToolchest();
for (int i = 0; i < state.numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, segmentId, new QueryableIndexSegment(state.qIndexes.get(i), segmentId));
singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner));
}
QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, singleSegmentRunners)), toolChest));
Sequence<Result<TopNResultValue>> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
List<Result<TopNResultValue>> results = queryResult.toList();
blackhole.consume(results);
}
use of org.apache.druid.timeline.SegmentId in project druid by druid-io.
the class TimeseriesBenchmark method queryMultiQueryableIndex.
@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunners = new ArrayList<>();
QueryToolChest toolChest = factory.getToolchest();
for (int i = 0; i < state.numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, segmentId, new QueryableIndexSegment(state.qIndexes.get(i), segmentId));
singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner));
}
QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, singleSegmentRunners)), toolChest));
Sequence<Result<TimeseriesResultValue>> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
List<Result<TimeseriesResultValue>> results = queryResult.toList();
blackhole.consume(results);
}
use of org.apache.druid.timeline.SegmentId in project druid by druid-io.
the class TimeCompareBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
setupQueries();
String schemaName = "basic";
schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
segmentIntervals = new Interval[numSegments];
long startMillis = schemaInfo.getDataInterval().getStartMillis();
long endMillis = schemaInfo.getDataInterval().getEndMillis();
long partialIntervalMillis = (endMillis - startMillis) / numSegments;
for (int i = 0; i < numSegments; i++) {
long partialEndMillis = startMillis + partialIntervalMillis;
segmentIntervals[i] = Intervals.utc(startMillis, partialEndMillis);
log.info("Segment [%d] with interval [%s]", i, segmentIntervals[i]);
startMillis = partialEndMillis;
}
incIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment " + i);
DataGenerator gen = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, segmentIntervals[i], rowsPerSegment);
IncrementalIndex incIndex = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
}
incIndexes.add(incIndex);
}
tmpDir = FileUtils.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
qIndexes = new ArrayList<>();
for (int i = 0; i < numSegments; i++) {
File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec(), null);
QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
qIndexes.add(qIndex);
}
List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
QueryToolChest toolChest = topNFactory.getToolchest();
for (int i = 0; i < numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(topNFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
singleSegmentRunners.add(new PerSegmentOptimizingQueryRunner<>(toolChest.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
}
topNRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(topNFactory.mergeRunners(executorService, singleSegmentRunners)), toolChest));
List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunnersT = new ArrayList<>();
QueryToolChest toolChestT = timeseriesFactory.getToolchest();
for (int i = 0; i < numSegments; i++) {
SegmentId segmentId = SegmentId.dummy("qIndex " + i);
QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(timeseriesFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
singleSegmentRunnersT.add(new PerSegmentOptimizingQueryRunner<>(toolChestT.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
}
timeseriesRunner = toolChestT.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChestT.mergeResults(timeseriesFactory.mergeRunners(executorService, singleSegmentRunnersT)), toolChestT));
}
use of org.apache.druid.timeline.SegmentId in project druid by druid-io.
the class SinglePhaseParallelIndexTaskRunner method allocateNewSegment.
/**
* Allocate a new segment for the given timestamp locally. This method is called when dynamic partitioning is used
* and {@link org.apache.druid.indexing.common.LockGranularity} is {@code TIME_CHUNK}.
*
* The allocation algorithm is similar to the Overlord-based segment allocation. It keeps the segment allocation
* history per sequenceName. If the prevSegmentId is found in the segment allocation history, this method
* returns the next segmentId right after the prevSegmentId in the history. Since the sequenceName is unique
* per {@link SubTaskSpec} (it is the ID of subtaskSpec), this algorithm guarantees that the same set of segmentIds
* are created in the same order for the same subtaskSpec.
*
* @see org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator#allocatePendingSegmentWithSegmentLineageCheck
*/
public SegmentIdWithShardSpec allocateNewSegment(String dataSource, DateTime timestamp, String sequenceName, @Nullable String prevSegmentId) throws IOException {
NonnullPair<Interval, String> intervalAndVersion = findIntervalAndVersion(timestamp);
MutableObject<SegmentIdWithShardSpec> segmentIdHolder = new MutableObject<>();
sequenceToSegmentIds.compute(sequenceName, (k, v) -> {
final int prevSegmentIdIndex;
final List<String> segmentIds;
if (prevSegmentId == null) {
prevSegmentIdIndex = -1;
segmentIds = v == null ? new ArrayList<>() : v;
} else {
segmentIds = v;
if (segmentIds == null) {
throw new ISE("Can't find previous segmentIds for sequence[%s]", sequenceName);
}
prevSegmentIdIndex = segmentIds.indexOf(prevSegmentId);
if (prevSegmentIdIndex == -1) {
throw new ISE("Can't find previously allocated segmentId[%s] for sequence[%s]", prevSegmentId, sequenceName);
}
}
final int nextSegmentIdIndex = prevSegmentIdIndex + 1;
final SegmentIdWithShardSpec newSegmentId;
if (nextSegmentIdIndex < segmentIds.size()) {
SegmentId segmentId = SegmentId.tryParse(dataSource, segmentIds.get(nextSegmentIdIndex));
if (segmentId == null) {
throw new ISE("Illegal segmentId format [%s]", segmentIds.get(nextSegmentIdIndex));
}
newSegmentId = new SegmentIdWithShardSpec(segmentId.getDataSource(), segmentId.getInterval(), segmentId.getVersion(), new BuildingNumberedShardSpec(segmentId.getPartitionNum()));
} else {
final int partitionNum = Counters.getAndIncrementInt(partitionNumCountersPerInterval, intervalAndVersion.lhs);
newSegmentId = new SegmentIdWithShardSpec(dataSource, intervalAndVersion.lhs, intervalAndVersion.rhs, new BuildingNumberedShardSpec(partitionNum));
segmentIds.add(newSegmentId.toString());
}
segmentIdHolder.setValue(newSegmentId);
return segmentIds;
});
return segmentIdHolder.getValue();
}
use of org.apache.druid.timeline.SegmentId in project druid by druid-io.
the class ScanQueryEngine method process.
public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
// "legacy" should be non-null due to toolChest.mergeResults
final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
final Long numScannedRows = responseContext.getRowScanCount();
if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
return Sequences.empty();
}
final boolean hasTimeout = QueryContexts.hasTimeout(query);
final Long timeoutAt = responseContext.getTimeoutTime();
final long start = System.currentTimeMillis();
final StorageAdapter adapter = segment.asStorageAdapter();
if (adapter == null) {
throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
}
final List<String> allColumns = new ArrayList<>();
if (query.getColumns() != null && !query.getColumns().isEmpty()) {
if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
allColumns.add(LEGACY_TIMESTAMP_KEY);
}
// Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
// the compactedList form easier to use.
allColumns.addAll(query.getColumns());
} else {
final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
allColumns.addAll(availableColumns);
if (legacy) {
allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
}
}
final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
final SegmentId segmentId = segment.getId();
final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
// If the row count is not set, set it to 0, else do nothing.
responseContext.addRowScanCount(0);
final long limit = calculateRemainingScanRowsLimit(query, responseContext);
return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {
@Override
public Iterator<ScanResultValue> make() {
final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
for (String column : allColumns) {
final BaseObjectColumnValueSelector selector;
if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
} else {
selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
}
columnSelectors.add(selector);
}
final int batchSize = query.getBatchSize();
return new Iterator<ScanResultValue>() {
private long offset = 0;
@Override
public boolean hasNext() {
return !cursor.isDone() && offset < limit;
}
@Override
public ScanResultValue next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
}
final long lastOffset = offset;
final Object events;
final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
events = rowsToCompactedList();
} else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
events = rowsToList();
} else {
throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
}
responseContext.addRowScanCount(offset - lastOffset);
if (hasTimeout) {
responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
}
return new ScanResultValue(segmentId.toString(), allColumns, events);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
private List<List<Object>> rowsToCompactedList() {
final List<List<Object>> events = new ArrayList<>(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final List<Object> theEvent = new ArrayList<>(allColumns.size());
for (int j = 0; j < allColumns.size(); j++) {
theEvent.add(getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private List<Map<String, Object>> rowsToList() {
List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
final long iterLimit = Math.min(limit, offset + batchSize);
for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
final Map<String, Object> theEvent = new LinkedHashMap<>();
for (int j = 0; j < allColumns.size(); j++) {
theEvent.put(allColumns.get(j), getColumnValue(j));
}
events.add(theEvent);
}
return events;
}
private Object getColumnValue(int i) {
final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
final Object value;
if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
value = DateTimes.utc((long) selector.getObject());
} else {
value = selector == null ? null : selector.getObject();
}
return value;
}
};
}
@Override
public void cleanup(Iterator<ScanResultValue> iterFromMake) {
}
})));
}
Aggregations