Search in sources :

Example 1 with ExpressionTransform

use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.

the class DataSchemaTest method testTransformSpec.

@Test
public void testTransformSpec() {
    Map<String, Object> parserMap = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
    DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parserMap, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), new TransformSpec(new SelectorDimFilter("dimA", "foo", null), ImmutableList.of(new ExpressionTransform("expr", "concat(dimA,dimA)", TestExprMacroTable.INSTANCE))), jsonMapper);
    // Test hack that produces a StringInputRowParser.
    final StringInputRowParser parser = (StringInputRowParser) schema.getParser();
    final InputRow row1bb = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(StandardCharsets.UTF_8))).get(0);
    Assert.assertEquals(DateTimes.of("2000-01-01"), row1bb.getTimestamp());
    Assert.assertEquals("foo", row1bb.getRaw("dimA"));
    Assert.assertEquals("foofoo", row1bb.getRaw("expr"));
    final InputRow row1string = parser.parse("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}");
    Assert.assertEquals(DateTimes.of("2000-01-01"), row1string.getTimestamp());
    Assert.assertEquals("foo", row1string.getRaw("dimA"));
    Assert.assertEquals("foofoo", row1string.getRaw("expr"));
    final InputRow row2 = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(StandardCharsets.UTF_8))).get(0);
    Assert.assertNull(row2);
}
Also used : DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRow(org.apache.druid.data.input.InputRow) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) IdUtilsTest(org.apache.druid.common.utils.IdUtilsTest)

Example 2 with ExpressionTransform

use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.

the class IndexTaskTest method testTransformSpec.

@Test
public void testTransformSpec() throws Exception {
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
        writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
        writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
    }
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
    final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
    final String listDelimiter = "|";
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
    final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
    final IndexIngestionSpec indexIngestionSpec;
    if (useInputFormatApi) {
        indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
    } else {
        indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
    }
    IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
    Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
    final List<DataSegment> segments = runTask(indexTask).rhs;
    Assert.assertEquals(1, segments.size());
    DataSegment segment = segments.get(0);
    final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
    final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
    final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
    final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
        final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
        final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
        final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
        final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
        Map<String, Object> row = new HashMap<>();
        row.put("dimt", selector1.defaultGetObject());
        row.put("dimtarray1", selector2.defaultGetObject());
        row.put("dimtarray2", selector3.defaultGetObject());
        row.put("dimtnum_array", selector4.defaultGetObject());
        cursor.advance();
        return row;
    }).toList();
    Assert.assertEquals(1, transforms.size());
    Assert.assertEquals("bb", transforms.get(0).get("dimt"));
    Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
    Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
    Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
    Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
    Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
    Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
    Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Arrays(java.util.Arrays) IndexSpec(org.apache.druid.segment.IndexSpec) Pair(org.apache.druid.java.util.common.Pair) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) IAE(org.apache.druid.java.util.common.IAE) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) NoopSegmentHandoffNotifierFactory(org.apache.druid.segment.realtime.plumber.NoopSegmentHandoffNotifierFactory) EqualsVerifier(nl.jqno.equalsverifier.EqualsVerifier) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) CountDownLatch(java.util.concurrent.CountDownLatch) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) RowIngestionMetersFactory(org.apache.druid.segment.incremental.RowIngestionMetersFactory) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) BufferedWriter(java.io.BufferedWriter) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) CoreMatchers(org.hamcrest.CoreMatchers) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Event(org.apache.druid.java.util.emitter.core.Event) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) Sequence(org.apache.druid.java.util.common.guava.Sequence) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Intervals(org.apache.druid.java.util.common.Intervals) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) HashSet(java.util.HashSet) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) Files(com.google.common.io.Files) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) ExpectedException(org.junit.rules.ExpectedException) SegmentHandoffNotifier(org.apache.druid.segment.handoff.SegmentHandoffNotifier) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) Granularities(org.apache.druid.java.util.common.granularity.Granularities) TimeUnit(java.util.concurrent.TimeUnit) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Cursor(org.apache.druid.segment.Cursor) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Cursor(org.apache.druid.segment.Cursor) DataSegment(org.apache.druid.timeline.DataSegment) BufferedWriter(java.io.BufferedWriter) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DimensionSelector(org.apache.druid.segment.DimensionSelector) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) File(java.io.File) Map(java.util.Map) LinkedHashMap(java.util.LinkedHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Test(org.junit.Test)

Example 3 with ExpressionTransform

use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method testTransformSpec.

@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
    expectPublishedSegments(2);
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
    final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0, true, 0, 1);
    final ListenableFuture<TaskStatus> statusFuture = runTask(task);
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    Collection<DataSegment> publishedSegments = awaitSegments();
    // Check metrics.
    Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
    Assert.assertEquals(1, task.getRowIngestionMeters().getThrownAway());
    Assert.assertEquals(0, task.getRowIngestionMeters().getUnparseable());
    // Do some queries.
    Assert.assertEquals(2, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(2, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
    if (NullHandling.replaceWithDefault()) {
        Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1").longValue());
    } else {
        Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1"));
    }
    Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
    awaitHandoffs();
    for (DataSegment publishedSegment : publishedSegments) {
        Pair<Executor, Runnable> executorRunnablePair = handOffCallbacks.get(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()));
        Assert.assertNotNull(publishedSegment + " missing from handoff callbacks: " + handOffCallbacks, executorRunnablePair);
        // Simulate handoff.
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Executor(java.util.concurrent.Executor) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 4 with ExpressionTransform

use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.

the class RealtimeIndexTaskTest method testTransformSpec.

@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
    final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
    final RealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0);
    final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
    final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
    final DataSegment publishedSegment;
    // Wait for firehose to show up, it starts off null.
    while (task.getFirehose() == null) {
        Thread.sleep(50);
    }
    final TestFirehose firehose = (TestFirehose) task.getFirehose();
    firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
    // Stop the firehose, this will drain out existing events.
    firehose.close();
    // Wait for publish.
    while (mdc.getPublished().isEmpty()) {
        Thread.sleep(50);
    }
    publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
    // Check metrics.
    Assert.assertEquals(1, task.getMetrics().processed());
    Assert.assertEquals(2, task.getMetrics().thrownAway());
    Assert.assertEquals(0, task.getMetrics().unparseable());
    // Do some queries.
    Assert.assertEquals(1, sumMetric(task, null, "rows").longValue());
    Assert.assertEquals(1, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
    if (NullHandling.replaceWithDefault()) {
        Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "rows").longValue());
    } else {
        Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "rows"));
    }
    Assert.assertEquals(1, sumMetric(task, null, "met1").longValue());
    // Simulate handoff.
    for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
        final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
        Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
        executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
    }
    handOffCallbacks.clear();
    // Wait for the task to finish.
    final TaskStatus taskStatus = statusFuture.get();
    Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
Also used : Period(org.joda.time.Period) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) TestFirehose(org.apache.druid.indexing.common.TestFirehose) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) Executor(java.util.concurrent.Executor) TestIndexerMetadataStorageCoordinator(org.apache.druid.indexing.test.TestIndexerMetadataStorageCoordinator) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) Pair(org.apache.druid.java.util.common.Pair) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 5 with ExpressionTransform

use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testTransformSpec.

@Test
public void testTransformSpec() throws IOException {
    Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size());
    Integer rowcount = 0;
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "1", null), ImmutableList.of(new ExpressionTransform(METRIC_FLOAT_NAME, METRIC_FLOAT_NAME + " * 10", ExprMacroTable.nil())));
    int skipped = 0;
    try (final Firehose firehose = factory.connect(transformSpec.decorate(rowParser), TMP_DIR)) {
        while (firehose.hasMore()) {
            InputRow row = firehose.nextRow();
            if (row == null) {
                skipped++;
                continue;
            }
            Assert.assertArrayEquals(new String[] { DIM_NAME }, row.getDimensions().toArray());
            Assert.assertArrayEquals(new String[] { DIM_VALUE }, row.getDimension(DIM_NAME).toArray());
            Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME).longValue());
            Assert.assertEquals(METRIC_FLOAT_VALUE * 10, row.getMetric(METRIC_FLOAT_NAME).floatValue(), METRIC_FLOAT_VALUE * 0.0001);
            ++rowcount;
        }
    }
    Assert.assertEquals(90, skipped);
    Assert.assertEquals((int) MAX_ROWS, (int) rowcount);
}
Also used : SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) Firehose(org.apache.druid.data.input.Firehose) InputRow(org.apache.druid.data.input.InputRow) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Test(org.junit.Test)

Aggregations

ExpressionTransform (org.apache.druid.segment.transform.ExpressionTransform)17 Test (org.junit.Test)17 TransformSpec (org.apache.druid.segment.transform.TransformSpec)16 SelectorDimFilter (org.apache.druid.query.filter.SelectorDimFilter)10 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)7 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)7 ImmutableMap (com.google.common.collect.ImmutableMap)5 Map (java.util.Map)5 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)5 DataSchema (org.apache.druid.segment.indexing.DataSchema)5 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)5 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)5 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)4 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)4 InputFormat (org.apache.druid.data.input.InputFormat)4 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)4 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)4 TaskStatus (org.apache.druid.indexer.TaskStatus)4