use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class DataSchemaTest method testTransformSpec.
@Test
public void testTransformSpec() {
Map<String, Object> parserMap = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parserMap, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), new TransformSpec(new SelectorDimFilter("dimA", "foo", null), ImmutableList.of(new ExpressionTransform("expr", "concat(dimA,dimA)", TestExprMacroTable.INSTANCE))), jsonMapper);
// Test hack that produces a StringInputRowParser.
final StringInputRowParser parser = (StringInputRowParser) schema.getParser();
final InputRow row1bb = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(StandardCharsets.UTF_8))).get(0);
Assert.assertEquals(DateTimes.of("2000-01-01"), row1bb.getTimestamp());
Assert.assertEquals("foo", row1bb.getRaw("dimA"));
Assert.assertEquals("foofoo", row1bb.getRaw("expr"));
final InputRow row1string = parser.parse("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}");
Assert.assertEquals(DateTimes.of("2000-01-01"), row1string.getTimestamp());
Assert.assertEquals("foo", row1string.getRaw("dimA"));
Assert.assertEquals("foofoo", row1string.getRaw("expr"));
final InputRow row2 = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(StandardCharsets.UTF_8))).get(0);
Assert.assertNull(row2);
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class IndexTaskTest method testTransformSpec.
@Test
public void testTransformSpec() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
}
final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
final String listDelimiter = "|";
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
final IndexIngestionSpec indexIngestionSpec;
if (useInputFormatApi) {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
} else {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
DataSegment segment = segments.get(0);
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
Map<String, Object> row = new HashMap<>();
row.put("dimt", selector1.defaultGetObject());
row.put("dimtarray1", selector2.defaultGetObject());
row.put("dimtarray2", selector3.defaultGetObject());
row.put("dimtnum_array", selector4.defaultGetObject());
cursor.advance();
return row;
}).toList();
Assert.assertEquals(1, transforms.size());
Assert.assertEquals("bb", transforms.get(0).get("dimt"));
Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class AppenderatorDriverRealtimeIndexTaskTest method testTransformSpec.
@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
expectPublishedSegments(2);
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
final AppenderatorDriverRealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0, true, 0, 1);
final ListenableFuture<TaskStatus> statusFuture = runTask(task);
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
Collection<DataSegment> publishedSegments = awaitSegments();
// Check metrics.
Assert.assertEquals(2, task.getRowIngestionMeters().getProcessed());
Assert.assertEquals(1, task.getRowIngestionMeters().getThrownAway());
Assert.assertEquals(0, task.getRowIngestionMeters().getUnparseable());
// Do some queries.
Assert.assertEquals(2, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(2, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1").longValue());
} else {
Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "metric1"));
}
Assert.assertEquals(3, sumMetric(task, null, "met1").longValue());
awaitHandoffs();
for (DataSegment publishedSegment : publishedSegments) {
Pair<Executor, Runnable> executorRunnablePair = handOffCallbacks.get(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()));
Assert.assertNotNull(publishedSegment + " missing from handoff callbacks: " + handOffCallbacks, executorRunnablePair);
// Simulate handoff.
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class RealtimeIndexTaskTest method testTransformSpec.
@Test(timeout = 60_000L)
public void testTransformSpec() throws Exception {
final TestIndexerMetadataStorageCoordinator mdc = new TestIndexerMetadataStorageCoordinator();
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())));
final RealtimeIndexTask task = makeRealtimeTask(null, transformSpec, true, 0);
final TaskToolbox taskToolbox = makeToolbox(task, mdc, tempFolder.newFolder());
final ListenableFuture<TaskStatus> statusFuture = runTask(task, taskToolbox);
final DataSegment publishedSegment;
// Wait for firehose to show up, it starts off null.
while (task.getFirehose() == null) {
Thread.sleep(50);
}
final TestFirehose firehose = (TestFirehose) task.getFirehose();
firehose.addRows(ImmutableList.of(ImmutableMap.of("t", now.getMillis(), "dim1", "foo", "met1", "1"), ImmutableMap.of("t", now.minus(new Period("P1D")).getMillis(), "dim1", "foo", "met1", 2.0), ImmutableMap.of("t", now.getMillis(), "dim2", "bar", "met1", 2.0)));
// Stop the firehose, this will drain out existing events.
firehose.close();
// Wait for publish.
while (mdc.getPublished().isEmpty()) {
Thread.sleep(50);
}
publishedSegment = Iterables.getOnlyElement(mdc.getPublished());
// Check metrics.
Assert.assertEquals(1, task.getMetrics().processed());
Assert.assertEquals(2, task.getMetrics().thrownAway());
Assert.assertEquals(0, task.getMetrics().unparseable());
// Do some queries.
Assert.assertEquals(1, sumMetric(task, null, "rows").longValue());
Assert.assertEquals(1, sumMetric(task, new SelectorDimFilter("dim1t", "foofoo", null), "rows").longValue());
if (NullHandling.replaceWithDefault()) {
Assert.assertEquals(0, sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "rows").longValue());
} else {
Assert.assertNull(sumMetric(task, new SelectorDimFilter("dim1t", "barbar", null), "rows"));
}
Assert.assertEquals(1, sumMetric(task, null, "met1").longValue());
// Simulate handoff.
for (Map.Entry<SegmentDescriptor, Pair<Executor, Runnable>> entry : handOffCallbacks.entrySet()) {
final Pair<Executor, Runnable> executorRunnablePair = entry.getValue();
Assert.assertEquals(new SegmentDescriptor(publishedSegment.getInterval(), publishedSegment.getVersion(), publishedSegment.getShardSpec().getPartitionNum()), entry.getKey());
executorRunnablePair.lhs.execute(executorRunnablePair.rhs);
}
handOffCallbacks.clear();
// Wait for the task to finish.
final TaskStatus taskStatus = statusFuture.get();
Assert.assertEquals(TaskState.SUCCESS, taskStatus.getStatusCode());
}
use of org.apache.druid.segment.transform.ExpressionTransform in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method testTransformSpec.
@Test
public void testTransformSpec() throws IOException {
Assert.assertEquals(MAX_SHARD_NUMBER.longValue(), SEGMENT_SET.size());
Integer rowcount = 0;
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter(ColumnHolder.TIME_COLUMN_NAME, "1", null), ImmutableList.of(new ExpressionTransform(METRIC_FLOAT_NAME, METRIC_FLOAT_NAME + " * 10", ExprMacroTable.nil())));
int skipped = 0;
try (final Firehose firehose = factory.connect(transformSpec.decorate(rowParser), TMP_DIR)) {
while (firehose.hasMore()) {
InputRow row = firehose.nextRow();
if (row == null) {
skipped++;
continue;
}
Assert.assertArrayEquals(new String[] { DIM_NAME }, row.getDimensions().toArray());
Assert.assertArrayEquals(new String[] { DIM_VALUE }, row.getDimension(DIM_NAME).toArray());
Assert.assertEquals(METRIC_LONG_VALUE.longValue(), row.getMetric(METRIC_LONG_NAME).longValue());
Assert.assertEquals(METRIC_FLOAT_VALUE * 10, row.getMetric(METRIC_FLOAT_NAME).floatValue(), METRIC_FLOAT_VALUE * 0.0001);
++rowcount;
}
}
Assert.assertEquals(90, skipped);
Assert.assertEquals((int) MAX_ROWS, (int) rowcount);
}
Aggregations