Search in sources :

Example 31 with QuerySegmentSpec

use of org.apache.druid.query.spec.QuerySegmentSpec in project druid by druid-io.

the class KafkaIndexTaskTest method testRunTransactionModeRollback.

@Test(timeout = 60_000L)
public void testRunTransactionModeRollback() throws Exception {
    final KafkaIndexTask task = createTask(null, new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
    final ListenableFuture<TaskStatus> future = runTask(task);
    // Insert 2 records initially
    try (final KafkaProducer<byte[], byte[]> kafkaProducer = kafkaServer.newProducer()) {
        kafkaProducer.initTransactions();
        kafkaProducer.beginTransaction();
        for (ProducerRecord<byte[], byte[]> record : Iterables.limit(records, 2)) {
            kafkaProducer.send(record).get();
        }
        kafkaProducer.commitTransaction();
    }
    while (countEvents(task) != 2) {
        Thread.sleep(25);
    }
    Assert.assertEquals(2, countEvents(task));
    Assert.assertEquals(Status.READING, task.getRunner().getStatus());
    // verify the 2 indexed records
    final QuerySegmentSpec firstInterval = OBJECT_MAPPER.readValue("\"2008/2010\"", QuerySegmentSpec.class);
    Iterable<ScanResultValue> scanResultValues = scanData(task, firstInterval);
    Assert.assertEquals(2, Iterables.size(scanResultValues));
    // Insert 3 more records and rollback
    try (final KafkaProducer<byte[], byte[]> kafkaProducer = kafkaServer.newProducer()) {
        kafkaProducer.initTransactions();
        kafkaProducer.beginTransaction();
        for (ProducerRecord<byte[], byte[]> record : Iterables.limit(Iterables.skip(records, 2), 3)) {
            kafkaProducer.send(record).get();
        }
        kafkaProducer.flush();
        kafkaProducer.abortTransaction();
    }
    Assert.assertEquals(2, countEvents(task));
    Assert.assertEquals(Status.READING, task.getRunner().getStatus());
    final QuerySegmentSpec rollbackedInterval = OBJECT_MAPPER.readValue("\"2010/2012\"", QuerySegmentSpec.class);
    scanResultValues = scanData(task, rollbackedInterval);
    // verify that there are no records indexed in the rollbacked time period
    Assert.assertEquals(0, Iterables.size(scanResultValues));
    // Insert remaining data
    try (final KafkaProducer<byte[], byte[]> kafkaProducer = kafkaServer.newProducer()) {
        kafkaProducer.initTransactions();
        kafkaProducer.beginTransaction();
        for (ProducerRecord<byte[], byte[]> record : Iterables.skip(records, 5)) {
            kafkaProducer.send(record).get();
        }
        kafkaProducer.commitTransaction();
    }
    final QuerySegmentSpec endInterval = OBJECT_MAPPER.readValue("\"2008/2049\"", QuerySegmentSpec.class);
    Iterable<ScanResultValue> scanResultValues1 = scanData(task, endInterval);
    Assert.assertEquals(2, Iterables.size(scanResultValues1));
    Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
    Assert.assertEquals(task.getRunner().getEndOffsets(), task.getRunner().getCurrentOffsets());
    // Check metrics
    Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getProcessed());
    Assert.assertEquals(3, task.getRunner().getRowIngestionMeters().getUnparseable());
    Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getThrownAway());
    // Check published metadata and segments in deep storage
    assertEqualsExceptVersion(ImmutableList.of(sdd("2008/P1D", 0, ImmutableList.of("a")), sdd("2009/P1D", 0, ImmutableList.of("b")), sdd("2013/P1D", 0, ImmutableList.of("f")), sdd("2049/P1D", 0, ImmutableList.of("f"))), publishedDescriptors());
    Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 13L))), newDataSchemaMetadata());
}
Also used : ScanResultValue(org.apache.druid.query.scan.ScanResultValue) SeekableStreamStartSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamStartSequenceNumbers) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) SeekableStreamEndSequenceNumbers(org.apache.druid.indexing.seekablestream.SeekableStreamEndSequenceNumbers) Test(org.junit.Test) IndexTaskTest(org.apache.druid.indexing.common.task.IndexTaskTest)

Example 32 with QuerySegmentSpec

use of org.apache.druid.query.spec.QuerySegmentSpec in project druid by druid-io.

the class NestedQueryPushDownTest method testSubqueryWithExtractionFnInOuterQuery.

@Test
public void testSubqueryWithExtractionFnInOuterQuery() {
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.utc(1500000000000L, 1600000000000L)));
    GroupByQuery query = GroupByQuery.builder().setDataSource("blah").setDimensions(new DefaultDimensionSpec("dimA", "dimA"), new DefaultDimensionSpec("dimB", "dimB")).setAggregatorSpecs(new LongSumAggregatorFactory("metASum", "metA"), new LongSumAggregatorFactory("metBSum", "metB")).setGranularity(Granularities.ALL).setQuerySegmentSpec(intervalSpec).build();
    GroupByQuery nestedQuery = GroupByQuery.builder().setDataSource(query).setDimensions(new ExtractionDimensionSpec("dimA", "extractedDimA", new RegexDimExtractionFn("^(p)", true, "replacement"))).setAggregatorSpecs(new LongSumAggregatorFactory("finalSum", "metASum")).setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_PUSH_DOWN_NESTED_QUERY, true)).setGranularity(Granularities.ALL).setQuerySegmentSpec(intervalSpec).build();
    ResultRow expectedRow0 = GroupByQueryRunnerTestHelper.createExpectedRow(nestedQuery, "2017-07-14T02:40:00.000Z", "finalSum", 4000L, "extractedDimA", "p");
    ResultRow expectedRow1 = GroupByQueryRunnerTestHelper.createExpectedRow(nestedQuery, "2017-07-14T02:40:00.000Z", "finalSum", 4000L, "extractedDimA", "replacement");
    Sequence<ResultRow> queryResult = runNestedQueryWithForcePushDown(nestedQuery);
    List<ResultRow> results = queryResult.toList();
    Assert.assertEquals(2, results.size());
    Assert.assertEquals(expectedRow0, results.get(0));
    Assert.assertEquals(expectedRow1, results.get(1));
}
Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) RegexDimExtractionFn(org.apache.druid.query.extraction.RegexDimExtractionFn) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) Test(org.junit.Test)

Example 33 with QuerySegmentSpec

use of org.apache.druid.query.spec.QuerySegmentSpec in project druid by druid-io.

the class NestedQueryPushDownTest method testDimensionFilterOnInnerQuery.

@Test
public void testDimensionFilterOnInnerQuery() {
    QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Collections.singletonList(Intervals.utc(1500000000000L, 1600000000000L)));
    GroupByQuery query = GroupByQuery.builder().setDataSource("blah").setDimensions(new DefaultDimensionSpec("dimA", "dimA"), new DefaultDimensionSpec("dimB", "dimB")).setAggregatorSpecs(new LongSumAggregatorFactory("metASum", "metA"), new LongSumAggregatorFactory("metBSum", "metB")).setGranularity(Granularities.ALL).setQuerySegmentSpec(intervalSpec).setDimFilter(new JavaScriptDimFilter("dimA", "function(dim){ return dim == 'mango' }", null, JavaScriptConfig.getEnabledInstance())).build();
    GroupByQuery nestedQuery = GroupByQuery.builder().setDataSource(query).setDimensions(new DefaultDimensionSpec("dimA", "newDimA")).setAggregatorSpecs(new LongSumAggregatorFactory("finalSum", "metASum")).setContext(ImmutableMap.of(GroupByQueryConfig.CTX_KEY_FORCE_PUSH_DOWN_NESTED_QUERY, true)).setGranularity(Granularities.ALL).setQuerySegmentSpec(intervalSpec).build();
    ResultRow expectedRow0 = GroupByQueryRunnerTestHelper.createExpectedRow(nestedQuery, "2017-07-14T02:40:00.000Z", "finalSum", 4000L, "newDimA", "mango");
    Sequence<ResultRow> queryResult = runNestedQueryWithForcePushDown(nestedQuery);
    List<ResultRow> results = queryResult.toList();
    Assert.assertEquals(1, results.size());
    Assert.assertEquals(expectedRow0, results.get(0));
}
Also used : LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) QuerySegmentSpec(org.apache.druid.query.spec.QuerySegmentSpec) MultipleIntervalSegmentSpec(org.apache.druid.query.spec.MultipleIntervalSegmentSpec) JavaScriptDimFilter(org.apache.druid.query.filter.JavaScriptDimFilter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Test(org.junit.Test)

Aggregations

QuerySegmentSpec (org.apache.druid.query.spec.QuerySegmentSpec)33 MultipleIntervalSegmentSpec (org.apache.druid.query.spec.MultipleIntervalSegmentSpec)28 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)20 Test (org.junit.Test)18 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)16 ArrayList (java.util.ArrayList)10 BoundDimFilter (org.apache.druid.query.filter.BoundDimFilter)9 List (java.util.List)8 FinalizeResultsQueryRunner (org.apache.druid.query.FinalizeResultsQueryRunner)7 DefaultLimitSpec (org.apache.druid.query.groupby.orderby.DefaultLimitSpec)7 OrderByColumnSpec (org.apache.druid.query.groupby.orderby.OrderByColumnSpec)7 GeneratorSchemaInfo (org.apache.druid.segment.generator.GeneratorSchemaInfo)7 QueryPlus (org.apache.druid.query.QueryPlus)6 QueryRunner (org.apache.druid.query.QueryRunner)6 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)6 ResponseContext (org.apache.druid.query.context.ResponseContext)6 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)6 LinkedHashMap (java.util.LinkedHashMap)5 BySegmentQueryRunner (org.apache.druid.query.BySegmentQueryRunner)5 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)5