Search in sources :

Example 6 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testTransformSpec.

@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
    expectPublishedSegments(2);
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
    final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0, true, 0, 1);
    final ListenableFuture<TaskStatus> statusFuture = runTask(task);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    Collection<DataSegment> publishedSegments = awaitSegments();
    // Check metrics.
    Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
    Assert.assertEquals(1, task.getRowIngestionMeters().getThrownAway());
    Assert.assertEquals(0, task.getRowIngestionMeters().getUnparseable());
    // Do some queries.
    Assert.assertEquals(2, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(2, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
    if (NullHandling.replaceWithDefault()) {
        Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1").longValue());
    } else {
        Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1"));
    }
    Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
    awaitHandoffs();
    for (DataSegment publishedSegment : publishedSegments) {
        Pair<Executor, Runnable> executorRunnablePair = handOffCallbacks.get(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()));
        Assert.assertNotNull(publishedSegment + " missing from handoff callbacks: " + handOffCallbacks, executorRunnablePair);
        // Simulate handoff.
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Executor(java.util.concurrent.Executor) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 7 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class RealtimeIndexTaskTest method testTransformSpec.

@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
    final RealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0);
    final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
    final DataSegment publishedSegment;
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    while (mdc.getPublished().isEmpty()) {
        Thread.sleep(50);
    }
    publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
    // Check metrics.
    Assert.assertEquals(1, task.getMetrics().processed());
    Assert.assertEquals(2, task.getMetrics().thrownAway());
    Assert.assertEquals(0, task.getMetrics().unparseable());
    // Do some queries.
    Assert.assertEquals(1, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(1, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
    if (NullHandling.replaceWithDefault()) {
        Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "rows").longValue());
    } else {
        Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "rows"));
    }
    Assert.assertEquals(1, sumMetric(task, null, "met1").longValue());
    // Simulate handoff.
    for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
        final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
        Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) TestFirehose(org.apache.druid.indexing.common.TestFirehose) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(org.apache.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pair(org.apache.druid.java.util.common.Pair) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 8 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testTransformSpec.

@Test
public void testTransformSpec() throws IOException {
    Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size());
    Integer rowcount = 0;
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "1", null), ImmutableList.of(new ExpressionTransform(METRIC_FLOAT_NAME, METRIC_FLOAT_NAME + " * 10", ExprMacroTable.nil())));
    int skipped = 0;
    try (final Firehose firehose = factory.connect(transformSpec.decorate(rowParser), TMP_DIR)) {
        while (firehose.hasMore()) {
            InputRow row = firehose.nextRow();
            if (row == null) {
                skipped++;
                continue;
            }
            Assert.assertArrayEquals(new String[] { DIM_NAME }, row.getDimensions().toArray());
            Assert.assertArrayEquals(new String[] { DIM_VALUE }, row.getDimension(DIM_NAME).toArray());
            Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME).longValue());
            Assert.assertEquals(METRIC_FLOAT_VALUE * 10, row.getMetric(METRIC_FLOAT_NAME).floatValue(), METRIC_FLOAT_VALUE * 0.0001);
            ++rowcount;
        }
    }
    Assert.assertEquals(90, skipped);
    Assert.assertEquals((int) MAX_ROWS, (int) rowcount);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Firehose(org.apache.druid.data.input.Firehose) InputRow(org.apache.druid.data.input.InputRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Test(org.junit.Test)

Example 9 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class InputSourceSamplerTest method testMultipleJsonStringInOneBlock.

/**
 * This case tests sampling for multiple json lines in one text block
 * Currently only RecordSupplierInputSource supports this kind of input, see https://github.com/apache/druid/pull/10383 for more information
 *
 * This test combines illegal json block and legal json block together to verify:
 * 1. all lines in the illegal json block should not be parsed
 * 2. the illegal json block should not affect the processing of the 2nd record
 * 3. all lines in legal json block should be parsed successfully
 */
@Test
public void testMultipleJsonStringInOneBlock() throws IOException {
    if (!ParserType.STR_JSON.equals(parserType) || !useInputFormatApi) {
        return;
    }
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1PlusBar")));
    final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1 + 'bar')", TestExprMacroTable.INSTANCE)));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    List<String> jsonBlockList = ImmutableList.of(// include the line which can't be parsed into JSON object to form a illegal json block
    String.join("", STR_JSON_ROWS), // exclude the last line to form a legal json block
    STR_JSON_ROWS.stream().limit(STR_JSON_ROWS.size() - 1).collect(Collectors.joining()));
    SamplerResponse response = inputSourceSampler.sample(new RecordSupplierInputSource("topicName", new TestRecordSupplier(jsonBlockList), true), createInputFormat(), dataSchema, new SamplerConfig(200, 3000));
    // 
    // the 1st json block contains STR_JSON_ROWS.size() lines, and 2nd json block contains STR_JSON_ROWS.size()-1 lines
    // together there should STR_JSON_ROWS.size() * 2 - 1 lines
    // 
    int illegalRows = STR_JSON_ROWS.size();
    int legalRows = STR_JSON_ROWS.size() - 1;
    Assert.assertEquals(illegalRows + legalRows, response.getNumRowsRead());
    Assert.assertEquals(legalRows, response.getNumRowsIndexed());
    Assert.assertEquals(illegalRows + 2, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    List<Map<String, Object>> rawColumnList = this.getRawColumns();
    int index = 0;
    // 
    // first n rows are related to the first json block which fails to parse
    // 
    String parseExceptionMessage;
    if (useInputFormatApi) {
        parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}";
    } else {
        parseExceptionMessage = "Timestamp[bad_timestamp] is unparseable! Event: {t=bad_timestamp, dim1=foo, met1=6}";
    }
    for (; index < illegalRows; index++) {
        assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(index), null, true, parseExceptionMessage), data.get(index));
    }
    // 
    // following are parsed rows for legal json block
    // 
    assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foobar").put("met1", 11L).build(), null, null), data.get(index++));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(rawColumnList.get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1PlusBar", "foo2bar").put("met1", 4L).build(), null, null), data.get(index));
}
Also used : SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 10 with TransformSpec

use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.

the class InputSourceSamplerTest method testWithTransformsAutoDimensions.

@Test
public void testWithTransformsAutoDimensions() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final TransformSpec transformSpec = new TransformSpec(null, ImmutableList.of(new ExpressionTransform("dim1PlusBar", "concat(dim1, 'bar')", TestExprMacroTable.INSTANCE)));
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

TransformSpec (org.apache.druid.segment.transform.TransformSpec)23 Test (org.junit.Test)19 ExpressionTransform (org.apache.druid.segment.transform.ExpressionTransform)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)13 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)13 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)12 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)10 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)9 DataSchema (org.apache.druid.segment.indexing.DataSchema)9 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)9 Map (java.util.Map)8 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)8 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)8 TaskStatus (org.apache.druid.indexer.TaskStatus)7 DataSegment (org.apache.druid.timeline.DataSegment)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 InputFormat (org.apache.druid.data.input.InputFormat)6 ArrayList (java.util.ArrayList)5 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)5 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)5