Search in sources :

Example 6 with ScanResultValue

use of org.apache.druid.query.scan.ScanResultValue in project druid by druid-io.

the class SingleTaskBackgroundRunnerTest method testGetQueryRunner.

@Test
public void testGetQueryRunner() throws ExecutionException, InterruptedException {
    runner.run(new NoopTask(null, null, "foo", 500L, 0, null, null, null)).get().getStatusCode();
    final QueryRunner<ScanResultValue> queryRunner = Druids.newScanQueryBuilder().dataSource("foo").intervals(new MultipleIntervalSegmentSpec(Intervals.ONLY_ETERNITY)).build().getRunner(runner);
    Assert.assertThat(queryRunner, CoreMatchers.instanceOf(SetAndVerifyContextQueryRunner.class));
}
Also used : SetAndVerifyContextQueryRunner(org.apache.druid.server.SetAndVerifyContextQueryRunner) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Test(org.junit.Test)

Example 7 with ScanResultValue

use of org.apache.druid.query.scan.ScanResultValue in project druid by druid-io.

the class KafkaIndexTaskTest method testKafkaInputFormat.

@Test(timeout = 60_000L)
public void testKafkaInputFormat() throws Exception {
    // Insert data
    insertData(Iterables.limit(records, 3));
    final KafkaIndexTask task = createTask(null, new DataSchema("test_ds", new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(Arrays.asList(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim1t"), new StringDimensionSchema("dim2"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"), new StringDimensionSchema("kafka.testheader.encoding"))), new AggregatorFactory[] { new DoubleSumAggregatorFactory("met1sum", "met1"), new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, KAFKA_INPUT_FORMAT));
    Assert.assertTrue(task.supportsQueries());
    final ListenableFuture<TaskStatus> future = runTask(task);
    while (countEvents(task) != 3) {
        Thread.sleep(25);
    }
    Assert.assertEquals(Status.READING, task.getRunner().getStatus());
    final QuerySegmentSpec interval = OBJECT_MAPPER.readValue("\"2008/2012\"", QuerySegmentSpec.class);
    List<ScanResultValue> scanResultValues = scanData(task, interval);
    // verify that there are no records indexed in the rollbacked time period
    Assert.assertEquals(3, Iterables.size(scanResultValues));
    int i = 0;
    for (ScanResultValue result : scanResultValues) {
        final Map<String, Object> event = ((List<Map<String, Object>>) result.getEvents()).get(0);
        Assert.assertEquals("application/json", event.get("kafka.testheader.encoding"));
        Assert.assertEquals("y", event.get("dim2"));
    }
    // insert remaining data
    insertData(Iterables.skip(records, 3));
    // Wait for task to exit
    Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
    // Check metrics
    Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getThrownAway());
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 8 with ScanResultValue

use of org.apache.druid.query.scan.ScanResultValue in project druid by druid-io.

the class ScanBenchmark method querySingleIncrementalIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleIncrementalIndex(Blackhole blackhole, IncrementalIndexState state) {
    QueryRunner<ScanResultValue> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("incIndex"), new IncrementalIndexSegment(state.incIndex, SegmentId.dummy("incIndex")));
    Query effectiveQuery = query.withDataSource(new TableDataSource("incIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
    blackhole.consume(results);
}
Also used : MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 9 with ScanResultValue

use of org.apache.druid.query.scan.ScanResultValue in project druid by druid-io.

the class ScanBenchmark method querySingleQueryableIndex.

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
public void querySingleQueryableIndex(Blackhole blackhole, QueryableIndexState state) {
    final QueryRunner<Result<ScanResultValue>> runner = QueryBenchmarkUtil.makeQueryRunner(factory, SegmentId.dummy("qIndex"), new QueryableIndexSegment(state.qIndexes.get(0), SegmentId.dummy("qIndex")));
    Query effectiveQuery = query.withDataSource(new TableDataSource("qIndex")).withQuerySegmentSpec(new MultipleSpecificSegmentSpec(ImmutableList.of(new SegmentDescriptor(Intervals.ETERNITY, "dummy_version", 0)))).withOverriddenContext(ImmutableMap.of(ScanQuery.CTX_KEY_OUTERMOST, false));
    List<ScanResultValue> results = ScanBenchmark.runQuery(factory, runner, effectiveQuery);
    blackhole.consume(results);
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) MultipleSpecificSegmentSpec(org.apache.druid.query.spec.MultipleSpecificSegmentSpec) ScanQuery(org.apache.druid.query.scan.ScanQuery) Query(org.apache.druid.query.Query) TableDataSource(org.apache.druid.query.TableDataSource) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) Result(org.apache.druid.query.Result) BenchmarkMode(org.openjdk.jmh.annotations.BenchmarkMode) Benchmark(org.openjdk.jmh.annotations.Benchmark) OutputTimeUnit(org.openjdk.jmh.annotations.OutputTimeUnit)

Example 10 with ScanResultValue

use of org.apache.druid.query.scan.ScanResultValue in project druid by druid-io.

the class HashPartitionMultiPhaseParallelIndexingTest method assertHashedPartition.

private void assertHashedPartition(Set<DataSegment> publishedSegments, Map<Interval, Integer> expectedIntervalToNumSegments) throws IOException {
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    Assert.assertEquals(new HashSet<>(inputIntervals), intervalToSegments.keySet());
    final File tempSegmentDir = temporaryFolder.newFolder();
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        Interval interval = entry.getKey();
        List<DataSegment> segmentsInInterval = entry.getValue();
        Assert.assertEquals(expectedIntervalToNumSegments.get(interval).intValue(), segmentsInInterval.size());
        for (DataSegment segment : segmentsInInterval) {
            Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
            final HashBasedNumberedShardSpec shardSpec = (HashBasedNumberedShardSpec) segment.getShardSpec();
            Assert.assertEquals(HashPartitionFunction.MURMUR3_32_ABS, shardSpec.getPartitionFunction());
            List<ScanResultValue> results = querySegment(segment, ImmutableList.of("dim1", "dim2"), tempSegmentDir);
            final int hash = shardSpec.getPartitionFunction().hash(HashBasedNumberedShardSpec.serializeGroupKey(getObjectMapper(), (List<Object>) results.get(0).getEvents()), shardSpec.getNumBuckets());
            for (ScanResultValue value : results) {
                Assert.assertEquals(hash, shardSpec.getPartitionFunction().hash(HashBasedNumberedShardSpec.serializeGroupKey(getObjectMapper(), (List<Object>) value.getEvents()), shardSpec.getNumBuckets()));
            }
        }
    }
}
Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

ScanResultValue (org.apache.druid.query.scan.ScanResultValue)15 Test (org.junit.Test)10 List (java.util.List)5 ScanQuery (org.apache.druid.query.scan.ScanQuery)5 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)5 ImmutableList (com.google.common.collect.ImmutableList)4 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)4 ServerConfig (org.apache.druid.server.initialization.ServerConfig)4 TaskStatus (org.apache.druid.indexer.TaskStatus)3 IndexTaskTest (org.apache.druid.indexing.common.task.IndexTaskTest)3 SeekableStreamEndSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers)3 SeekableStreamStartSequenceNumbers (org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers)3 TableDataSource (org.apache.druid.query.TableDataSource)3 MultipleSpecificSegmentSpec (org.apache.druid.query.spec.MultipleSpecificSegmentSpec)3 QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)3 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)2 FloatDimensionSchema (org.apache.druid.data.input.impl.FloatDimensionSchema)2 LongDimensionSchema (org.apache.druid.data.input.impl.LongDimensionSchema)2 StringDimensionSchema (org.apache.druid.data.input.impl.StringDimensionSchema)2 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)2