Search in sources :

Example 31 with SegmentId

use of org.apache.druid.timeline.SegmentId in project druid by druid-io.

the class TopNBenchmark method queryMultiQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
    QueryToolChest toolChest = factory.getToolchest();
    for (int i = 0; i < state.numSegments; i++) {
        SegmentId segmentId = SegmentId.dummy("qIndex " + i);
        QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, segmentId, new QueryableIndexSegment(state.qIndexes.get(i), segmentId));
        singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner));
    }
    QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, singleSegmentRunners)), toolChest));
    Sequence<Result<TopNResultValue>> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
    List<Result<TopNResultValue>> results = queryResult.toList();
    blackhole.consume(results);
}
Also used : TopNResultValue(org.apache.druid.query.topn.TopNResultValue) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) SegmentId(org.apache.druid.timeline.SegmentId) ArrayList(java.util.ArrayList) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) Result(org.apache.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 32 with SegmentId

use of org.apache.druid.timeline.SegmentId in project druid by druid-io.

the class TimeseriesBenchmark method queryMultiQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void queryMultiQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunners = new ArrayList<>();
    QueryToolChest toolChest = factory.getToolchest();
    for (int i = 0; i < state.numSegments; i++) {
        SegmentId segmentId = SegmentId.dummy("qIndex " + i);
        QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, segmentId, new QueryableIndexSegment(state.qIndexes.get(i), segmentId));
        singleSegmentRunners.add(toolChest.preMergeQueryDecoration(runner));
    }
    QueryRunner theRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(factory.mergeRunners(state.executorService, singleSegmentRunners)), toolChest));
    Sequence<Result<TimeseriesResultValue>> queryResult = theRunner.run(QueryPlus.wrap(query), ResponseContext.createEmpty());
    List<Result<TimeseriesResultValue>> results = queryResult.toList();
    blackhole.consume(results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) SegmentId(org.apache.druid.timeline.SegmentId) ArrayList(java.util.ArrayList) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) Result(org.apache.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 33 with SegmentId

use of org.apache.druid.timeline.SegmentId in project druid by druid-io.

the class TimeCompareBenchmark method setup.

@Setup
public void setup() throws IOException {
    log.info("SETUP CALLED AT " + System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    executorService = Execs.multiThreaded(numSegments, "TopNThreadPool");
    setupQueries();
    String schemaName = "basic";
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
    segmentIntervals = new Interval[numSegments];
    long startMillis = schemaInfo.getDataInterval().getStartMillis();
    long endMillis = schemaInfo.getDataInterval().getEndMillis();
    long partialIntervalMillis = (endMillis - startMillis) / numSegments;
    for (int i = 0; i < numSegments; i++) {
        long partialEndMillis = startMillis + partialIntervalMillis;
        segmentIntervals[i] = Intervals.utc(startMillis, partialEndMillis);
        log.info("Segment [%d] with interval [%s]", i, segmentIntervals[i]);
        startMillis = partialEndMillis;
    }
    incIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        log.info("Generating rows for segment " + i);
        DataGenerator gen = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + i, segmentIntervals[i], rowsPerSegment);
        IncrementalIndex incIndex = makeIncIndex();
        for (int j = 0; j < rowsPerSegment; j++) {
            InputRow row = gen.nextRow();
            if (j % 10000 == 0) {
                log.info(j + " rows generated.");
            }
            incIndex.add(row);
        }
        incIndexes.add(incIndex);
    }
    tmpDir = FileUtils.createTempDir();
    log.info("Using temp dir: " + tmpDir.getAbsolutePath());
    qIndexes = new ArrayList<>();
    for (int i = 0; i < numSegments; i++) {
        File indexFile = INDEX_MERGER_V9.persist(incIndexes.get(i), tmpDir, new IndexSpec(), null);
        QueryableIndex qIndex = INDEX_IO.loadIndex(indexFile);
        qIndexes.add(qIndex);
    }
    List<QueryRunner<Result<TopNResultValue>>> singleSegmentRunners = new ArrayList<>();
    QueryToolChest toolChest = topNFactory.getToolchest();
    for (int i = 0; i < numSegments; i++) {
        SegmentId segmentId = SegmentId.dummy("qIndex " + i);
        QueryRunner<Result<TopNResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(topNFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
        singleSegmentRunners.add(new PerSegmentOptimizingQueryRunner<>(toolChest.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
    }
    topNRunner = toolChest.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChest.mergeResults(topNFactory.mergeRunners(executorService, singleSegmentRunners)), toolChest));
    List<QueryRunner<Result<TimeseriesResultValue>>> singleSegmentRunnersT = new ArrayList<>();
    QueryToolChest toolChestT = timeseriesFactory.getToolchest();
    for (int i = 0; i < numSegments; i++) {
        SegmentId segmentId = SegmentId.dummy("qIndex " + i);
        QueryRunner<Result<TimeseriesResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(timeseriesFactory, segmentId, new QueryableIndexSegment(qIndexes.get(i), segmentId));
        singleSegmentRunnersT.add(new PerSegmentOptimizingQueryRunner<>(toolChestT.preMergeQueryDecoration(runner), new PerSegmentQueryOptimizationContext(new SegmentDescriptor(segmentIntervals[i], "1", 0))));
    }
    timeseriesRunner = toolChestT.postMergeQueryDecoration(new FinalizeResultsQueryRunner<>(toolChestT.mergeResults(timeseriesFactory.mergeRunners(executorService, singleSegmentRunnersT)), toolChestT));
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) TopNQueryQueryToolChest(org.apache.druid.query.topn.TopNQueryQueryToolChest) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) QueryToolChest(org.apache.druid.query.QueryToolChest) Result(org.apache.druid.query.Result) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) TopNResultValue(org.apache.druid.query.topn.TopNResultValue) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) SegmentId(org.apache.druid.timeline.SegmentId) QueryRunner(org.apache.druid.query.QueryRunner) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) PerSegmentOptimizingQueryRunner(org.apache.druid.query.PerSegmentOptimizingQueryRunner) PerSegmentQueryOptimizationContext(org.apache.druid.query.PerSegmentQueryOptimizationContext) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) QueryableIndex(org.apache.druid.segment.QueryableIndex) DataGenerator(org.apache.druid.segment.generator.DataGenerator) InputRow(org.apache.druid.data.input.InputRow) File(java.io.File) Setup(org.openjdk.jmh.annotations.Setup)

Example 34 with SegmentId

use of org.apache.druid.timeline.SegmentId in project druid by druid-io.

the class SinglePhaseParallelIndexTaskRunner method allocateNewSegment.

/**
 * Allocate a new segment for the given timestamp locally. This method is called when dynamic partitioning is used
 * and {@link org.apache.druid.indexing.common.LockGranularity} is {@code TIME_CHUNK}.
 *
 * The allocation algorithm is similar to the Overlord-based segment allocation. It keeps the segment allocation
 * history per sequenceName. If the prevSegmentId is found in the segment allocation history, this method
 * returns the next segmentId right after the prevSegmentId in the history. Since the sequenceName is unique
 * per {@link SubTaskSpec} (it is the ID of subtaskSpec), this algorithm guarantees that the same set of segmentIds
 * are created in the same order for the same subtaskSpec.
 *
 * @see org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator#allocatePendingSegmentWithSegmentLineageCheck
 */
public SegmentIdWithShardSpec allocateNewSegment(String dataSource, DateTime timestamp, String sequenceName, @Nullable String prevSegmentId) throws IOException {
    NonnullPair<Interval, String> intervalAndVersion = findIntervalAndVersion(timestamp);
    MutableObject<SegmentIdWithShardSpec> segmentIdHolder = new MutableObject<>();
    sequenceToSegmentIds.compute(sequenceName, (k, v) -> {
        final int prevSegmentIdIndex;
        final List<String> segmentIds;
        if (prevSegmentId == null) {
            prevSegmentIdIndex = -1;
            segmentIds = v == null ? new ArrayList<>() : v;
        } else {
            segmentIds = v;
            if (segmentIds == null) {
                throw new ISE("Can't find previous segmentIds for sequence[%s]", sequenceName);
            }
            prevSegmentIdIndex = segmentIds.indexOf(prevSegmentId);
            if (prevSegmentIdIndex == -1) {
                throw new ISE("Can't find previously allocated segmentId[%s] for sequence[%s]", prevSegmentId, sequenceName);
            }
        }
        final int nextSegmentIdIndex = prevSegmentIdIndex + 1;
        final SegmentIdWithShardSpec newSegmentId;
        if (nextSegmentIdIndex < segmentIds.size()) {
            SegmentId segmentId = SegmentId.tryParse(dataSource, segmentIds.get(nextSegmentIdIndex));
            if (segmentId == null) {
                throw new ISE("Illegal segmentId format [%s]", segmentIds.get(nextSegmentIdIndex));
            }
            newSegmentId = new SegmentIdWithShardSpec(segmentId.getDataSource(), segmentId.getInterval(), segmentId.getVersion(), new BuildingNumberedShardSpec(segmentId.getPartitionNum()));
        } else {
            final int partitionNum = Counters.getAndIncrementInt(partitionNumCountersPerInterval, intervalAndVersion.lhs);
            newSegmentId = new SegmentIdWithShardSpec(dataSource, intervalAndVersion.lhs, intervalAndVersion.rhs, new BuildingNumberedShardSpec(partitionNum));
            segmentIds.add(newSegmentId.toString());
        }
        segmentIdHolder.setValue(newSegmentId);
        return segmentIds;
    });
    return segmentIdHolder.getValue();
}
Also used : SegmentId(org.apache.druid.timeline.SegmentId) ArrayList(java.util.ArrayList) BuildingNumberedShardSpec(org.apache.druid.timeline.partition.BuildingNumberedShardSpec) ISE(org.apache.druid.java.util.common.ISE) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Interval(org.joda.time.Interval) MutableObject(org.apache.commons.lang3.mutable.MutableObject)

Example 35 with SegmentId

use of org.apache.druid.timeline.SegmentId in project druid by druid-io.

the class ScanQueryEngine method process.

public Sequence<ScanResultValue> process(final ScanQuery query, final Segment segment, final ResponseContext responseContext) {
    // "legacy" should be non-null due to toolChest.mergeResults
    final boolean legacy = Preconditions.checkNotNull(query.isLegacy(), "Expected non-null 'legacy' parameter");
    final Long numScannedRows = responseContext.getRowScanCount();
    if (numScannedRows != null && numScannedRows >= query.getScanRowsLimit() && query.getTimeOrder().equals(ScanQuery.Order.NONE)) {
        return Sequences.empty();
    }
    final boolean hasTimeout = QueryContexts.hasTimeout(query);
    final Long timeoutAt = responseContext.getTimeoutTime();
    final long start = System.currentTimeMillis();
    final StorageAdapter adapter = segment.asStorageAdapter();
    if (adapter == null) {
        throw new ISE("Null storage adapter found. Probably trying to issue a query against a segment being memory unmapped.");
    }
    final List<String> allColumns = new ArrayList<>();
    if (query.getColumns() != null && !query.getColumns().isEmpty()) {
        if (legacy && !query.getColumns().contains(LEGACY_TIMESTAMP_KEY)) {
            allColumns.add(LEGACY_TIMESTAMP_KEY);
        }
        // Unless we're in legacy mode, allColumns equals query.getColumns() exactly. This is nice since it makes
        // the compactedList form easier to use.
        allColumns.addAll(query.getColumns());
    } else {
        final Set<String> availableColumns = Sets.newLinkedHashSet(Iterables.concat(Collections.singleton(legacy ? LEGACY_TIMESTAMP_KEY : ColumnHolder.TIME_COLUMN_NAME), Iterables.transform(Arrays.asList(query.getVirtualColumns().getVirtualColumns()), VirtualColumn::getOutputName), adapter.getAvailableDimensions(), adapter.getAvailableMetrics()));
        allColumns.addAll(availableColumns);
        if (legacy) {
            allColumns.remove(ColumnHolder.TIME_COLUMN_NAME);
        }
    }
    final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals();
    Preconditions.checkArgument(intervals.size() == 1, "Can only handle a single interval, got[%s]", intervals);
    final SegmentId segmentId = segment.getId();
    final Filter filter = Filters.convertToCNFFromQueryContext(query, Filters.toFilter(query.getFilter()));
    // If the row count is not set, set it to 0, else do nothing.
    responseContext.addRowScanCount(0);
    final long limit = calculateRemainingScanRowsLimit(query, responseContext);
    return Sequences.concat(adapter.makeCursors(filter, intervals.get(0), query.getVirtualColumns(), Granularities.ALL, query.getTimeOrder().equals(ScanQuery.Order.DESCENDING) || (query.getTimeOrder().equals(ScanQuery.Order.NONE) && query.isDescending()), null).map(cursor -> new BaseSequence<>(new BaseSequence.IteratorMaker<ScanResultValue, Iterator<ScanResultValue>>() {

        @Override
        public Iterator<ScanResultValue> make() {
            final List<BaseObjectColumnValueSelector> columnSelectors = new ArrayList<>(allColumns.size());
            for (String column : allColumns) {
                final BaseObjectColumnValueSelector selector;
                if (legacy && LEGACY_TIMESTAMP_KEY.equals(column)) {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(ColumnHolder.TIME_COLUMN_NAME);
                } else {
                    selector = cursor.getColumnSelectorFactory().makeColumnValueSelector(column);
                }
                columnSelectors.add(selector);
            }
            final int batchSize = query.getBatchSize();
            return new Iterator<ScanResultValue>() {

                private long offset = 0;

                @Override
                public boolean hasNext() {
                    return !cursor.isDone() && offset < limit;
                }

                @Override
                public ScanResultValue next() {
                    if (!hasNext()) {
                        throw new NoSuchElementException();
                    }
                    if (hasTimeout && System.currentTimeMillis() >= timeoutAt) {
                        throw new QueryTimeoutException(StringUtils.nonStrictFormat("Query [%s] timed out", query.getId()));
                    }
                    final long lastOffset = offset;
                    final Object events;
                    final ScanQuery.ResultFormat resultFormat = query.getResultFormat();
                    if (ScanQuery.ResultFormat.RESULT_FORMAT_COMPACTED_LIST.equals(resultFormat)) {
                        events = rowsToCompactedList();
                    } else if (ScanQuery.ResultFormat.RESULT_FORMAT_LIST.equals(resultFormat)) {
                        events = rowsToList();
                    } else {
                        throw new UOE("resultFormat[%s] is not supported", resultFormat.toString());
                    }
                    responseContext.addRowScanCount(offset - lastOffset);
                    if (hasTimeout) {
                        responseContext.putTimeoutTime(timeoutAt - (System.currentTimeMillis() - start));
                    }
                    return new ScanResultValue(segmentId.toString(), allColumns, events);
                }

                @Override
                public void remove() {
                    throw new UnsupportedOperationException();
                }

                private List<List<Object>> rowsToCompactedList() {
                    final List<List<Object>> events = new ArrayList<>(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final List<Object> theEvent = new ArrayList<>(allColumns.size());
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.add(getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private List<Map<String, Object>> rowsToList() {
                    List<Map<String, Object>> events = Lists.newArrayListWithCapacity(batchSize);
                    final long iterLimit = Math.min(limit, offset + batchSize);
                    for (; !cursor.isDone() && offset < iterLimit; cursor.advance(), offset++) {
                        final Map<String, Object> theEvent = new LinkedHashMap<>();
                        for (int j = 0; j < allColumns.size(); j++) {
                            theEvent.put(allColumns.get(j), getColumnValue(j));
                        }
                        events.add(theEvent);
                    }
                    return events;
                }

                private Object getColumnValue(int i) {
                    final BaseObjectColumnValueSelector selector = columnSelectors.get(i);
                    final Object value;
                    if (legacy && allColumns.get(i).equals(LEGACY_TIMESTAMP_KEY)) {
                        value = DateTimes.utc((long) selector.getObject());
                    } else {
                        value = selector == null ? null : selector.getObject();
                    }
                    return value;
                }
            };
        }

        @Override
        public void cleanup(Iterator<ScanResultValue> iterFromMake) {
        }
    })));
}
Also used : Iterables(com.google.common.collect.Iterables) Arrays(java.util.Arrays) StorageAdapter(org.apache.druid.segment.StorageAdapter) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) Map(java.util.Map) UOE(org.apache.druid.java.util.common.UOE) NoSuchElementException(java.util.NoSuchElementException) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) Sequences(org.apache.druid.java.util.common.guava.Sequences) Segment(org.apache.druid.segment.Segment) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) Iterator(java.util.Iterator) ResponseContext(org.apache.druid.query.context.ResponseContext) VirtualColumn(org.apache.druid.segment.VirtualColumn) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Sets(com.google.common.collect.Sets) QueryContexts(org.apache.druid.query.QueryContexts) Granularities(org.apache.druid.java.util.common.granularity.Granularities) List(java.util.List) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Preconditions(com.google.common.base.Preconditions) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) SegmentId(org.apache.druid.timeline.SegmentId) Filters(org.apache.druid.segment.filter.Filters) Collections(java.util.Collections) Filter(org.apache.druid.query.filter.Filter) ArrayList(java.util.ArrayList) StorageAdapter(org.apache.druid.segment.StorageAdapter) QueryTimeoutException(org.apache.druid.query.QueryTimeoutException) Iterator(java.util.Iterator) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) SegmentId(org.apache.druid.timeline.SegmentId) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) UOE(org.apache.druid.java.util.common.UOE) BaseSequence(org.apache.druid.java.util.common.guava.BaseSequence) Filter(org.apache.druid.query.filter.Filter) VirtualColumn(org.apache.druid.segment.VirtualColumn) LinkedHashMap(java.util.LinkedHashMap) Map(java.util.Map) NoSuchElementException(java.util.NoSuchElementException) Interval(org.joda.time.Interval)

Aggregations

SegmentId (org.apache.druid.timeline.SegmentId)63 DataSegment (org.apache.druid.timeline.DataSegment)32 Test (org.junit.Test)21 Interval (org.joda.time.Interval)14 ISE (org.apache.druid.java.util.common.ISE)13 ArrayList (java.util.ArrayList)12 Map (java.util.Map)12 Set (java.util.Set)12 ImmutableDruidDataSource (org.apache.druid.client.ImmutableDruidDataSource)12 List (java.util.List)11 ImmutableMap (com.google.common.collect.ImmutableMap)10 IOException (java.io.IOException)9 TreeMap (java.util.TreeMap)9 CountDownLatch (java.util.concurrent.CountDownLatch)9 VisibleForTesting (com.google.common.annotations.VisibleForTesting)8 Collectors (java.util.stream.Collectors)8 Optional (java.util.Optional)7 Sets (com.google.common.collect.Sets)6 Nullable (javax.annotation.Nullable)6 Response (javax.ws.rs.core.Response)6