use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class KafkaIndexTaskTest method testRunWithTransformSpec.
@Test(timeout = 60_000L)
public void testRunWithTransformSpec() throws Exception {
final KafkaIndexTask task = createTask(null, NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(new SelectorDimFilter("dim1", "b", null), ImmutableList.of(new ExpressionTransform("dim1t", "concat(dim1,dim1)", ExprMacroTable.nil())))), new KafkaIndexTaskIOConfig(0, "sequence0", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(0, 0L), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L)), kafkaServer.consumerProperties(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final ListenableFuture<TaskStatus> future = runTask(task);
// Wait for the task to start reading
while (task.getRunner().getStatus() != Status.READING) {
Thread.sleep(10);
}
// Insert data
insertData();
// Wait for task to exit
Assert.assertEquals(TaskState.SUCCESS, future.get().getStatusCode());
// Check metrics
Assert.assertEquals(1, task.getRunner().getRowIngestionMeters().getProcessed());
Assert.assertEquals(0, task.getRunner().getRowIngestionMeters().getUnparseable());
Assert.assertEquals(4, task.getRunner().getRowIngestionMeters().getThrownAway());
// Check published metadata
final List<SegmentDescriptor> publishedDescriptors = publishedDescriptors();
assertEqualsExceptVersion(ImmutableList.of(sdd("2009/P1D", 0)), publishedDescriptors);
Assert.assertEquals(new KafkaDataSourceMetadata(new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of(0, 5L))), newDataSchemaMetadata());
// Check segments in deep storage
Assert.assertEquals(ImmutableList.of("b"), readSegmentColumn("dim1", publishedDescriptors.get(0)));
Assert.assertEquals(ImmutableList.of("bb"), readSegmentColumn("dim1t", publishedDescriptors.get(0)));
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class KafkaIndexTaskTest method testSerde.
@Test
public void testSerde() throws Exception {
// This is both a serde test and a regression test for https://github.com/apache/druid/issues/7724.
final KafkaIndexTask task = createTask("taskid", NEW_DATA_SCHEMA.withTransformSpec(new TransformSpec(null, ImmutableList.of(new ExpressionTransform("beep", "nofunc()", ExprMacroTable.nil())))), new KafkaIndexTaskIOConfig(0, "sequence", new SeekableStreamStartSequenceNumbers<>(topic, ImmutableMap.of(), ImmutableSet.of()), new SeekableStreamEndSequenceNumbers<>(topic, ImmutableMap.of()), ImmutableMap.of(), KafkaSupervisorIOConfig.DEFAULT_POLL_TIMEOUT_MILLIS, true, null, null, INPUT_FORMAT));
final Task task1 = OBJECT_MAPPER.readValue(OBJECT_MAPPER.writeValueAsBytes(task), Task.class);
Assert.assertEquals(task, task1);
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class InputRowSchemasTest method test_createColumnsFilter_normal.
@Test
public void test_createColumnsFilter_normal() {
final ColumnsFilter columnsFilter = InputRowSchemas.createColumnsFilter(new TimestampSpec("ts", "auto", null), new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("foo"))), new TransformSpec(new SelectorDimFilter("bar", "x", null), ImmutableList.of(new ExpressionTransform("baz", "qux + 3", ExprMacroTable.nil()))), new AggregatorFactory[] { new LongSumAggregatorFactory("billy", "bob") });
Assert.assertEquals(ColumnsFilter.inclusionBased(ImmutableSet.of("ts", "foo", "bar", "qux", "bob")), columnsFilter);
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class InputRowSchemasTest method test_createColumnsFilter_schemaless.
@Test
public void test_createColumnsFilter_schemaless() {
final ColumnsFilter columnsFilter = InputRowSchemas.createColumnsFilter(new TimestampSpec("ts", "auto", null), DimensionsSpec.builder().setDimensionExclusions(ImmutableList.of("ts", "foo", "bar", "qux", "bob")).build(), new TransformSpec(new SelectorDimFilter("bar", "x", null), ImmutableList.of(new ExpressionTransform("baz", "qux + 3", ExprMacroTable.nil()))), new AggregatorFactory[] { new LongSumAggregatorFactory("billy", "bob") });
Assert.assertEquals(ColumnsFilter.exclusionBased(ImmutableSet.of("foo")), columnsFilter);
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
// Download all segments locally.
// Note: this requires enough local storage space to fit all of the segments, even though
// IngestSegmentFirehose iterates over the segments in series. We may want to change this
// to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
final DataSegment segment = chunk.getObject();
segmentFileMap.computeIfAbsent(segment, k -> {
try {
return segmentCacheManager.getSegmentFiles(segment);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
});
}
}
final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
})));
final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
Aggregations