use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class DataSchemaTest method testWithDimensionSpec.
@Test
public void testWithDimensionSpec() {
TimestampSpec tsSpec = Mockito.mock(TimestampSpec.class);
GranularitySpec gSpec = Mockito.mock(GranularitySpec.class);
DimensionsSpec oldDimSpec = Mockito.mock(DimensionsSpec.class);
DimensionsSpec newDimSpec = Mockito.mock(DimensionsSpec.class);
AggregatorFactory aggFactory = Mockito.mock(AggregatorFactory.class);
Mockito.when(aggFactory.getName()).thenReturn("myAgg");
TransformSpec transSpec = Mockito.mock(TransformSpec.class);
Map<String, Object> parserMap = Mockito.mock(Map.class);
Mockito.when(newDimSpec.withDimensionExclusions(ArgumentMatchers.any(Set.class))).thenReturn(newDimSpec);
DataSchema oldSchema = new DataSchema("dataSource", tsSpec, oldDimSpec, new AggregatorFactory[] { aggFactory }, gSpec, transSpec, parserMap, jsonMapper);
DataSchema newSchema = oldSchema.withDimensionsSpec(newDimSpec);
Assert.assertSame(oldSchema.getDataSource(), newSchema.getDataSource());
Assert.assertSame(oldSchema.getTimestampSpec(), newSchema.getTimestampSpec());
Assert.assertSame(newDimSpec, newSchema.getDimensionsSpec());
Assert.assertSame(oldSchema.getAggregators(), newSchema.getAggregators());
Assert.assertSame(oldSchema.getGranularitySpec(), newSchema.getGranularitySpec());
Assert.assertSame(oldSchema.getTransformSpec(), newSchema.getTransformSpec());
Assert.assertSame(oldSchema.getParserMap(), newSchema.getParserMap());
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class DataSchemaTest method testTransformSpec.
@Test
public void testTransformSpec() {
Map<String, Object> parserMap = jsonMapper.convertValue(new StringInputRowParser(new JSONParseSpec(new TimestampSpec("time", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("time", "dimA", "dimB", "col2"))), null, null, null), null), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT);
DataSchema schema = new DataSchema(IdUtilsTest.VALID_ID_CHARS, parserMap, new AggregatorFactory[] { new DoubleSumAggregatorFactory("metric1", "col1"), new DoubleSumAggregatorFactory("metric2", "col2") }, new ArbitraryGranularitySpec(Granularities.DAY, ImmutableList.of(Intervals.of("2014/2015"))), new TransformSpec(new SelectorDimFilter("dimA", "foo", null), ImmutableList.of(new ExpressionTransform("expr", "concat(dimA,dimA)", TestExprMacroTable.INSTANCE))), jsonMapper);
// Test hack that produces a StringInputRowParser.
final StringInputRowParser parser = (StringInputRowParser) schema.getParser();
final InputRow row1bb = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}".getBytes(StandardCharsets.UTF_8))).get(0);
Assert.assertEquals(DateTimes.of("2000-01-01"), row1bb.getTimestamp());
Assert.assertEquals("foo", row1bb.getRaw("dimA"));
Assert.assertEquals("foofoo", row1bb.getRaw("expr"));
final InputRow row1string = parser.parse("{\"time\":\"2000-01-01\",\"dimA\":\"foo\"}");
Assert.assertEquals(DateTimes.of("2000-01-01"), row1string.getTimestamp());
Assert.assertEquals("foo", row1string.getRaw("dimA"));
Assert.assertEquals("foofoo", row1string.getRaw("expr"));
final InputRow row2 = parser.parseBatch(ByteBuffer.wrap("{\"time\":\"2000-01-01\",\"dimA\":\"x\"}".getBytes(StandardCharsets.UTF_8))).get(0);
Assert.assertNull(row2);
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentFilter.
@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentFilter() throws Exception {
NullHandling.initializeForTests();
// Same indexSpec as what is set in the auto compaction config
Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
});
// Same partitionsSpec as what is set in the auto compaction config
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
// Create segments that were compacted (CompactionState != null) and have
// filter=SelectorDimFilter("dim1", "foo", null) for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
// filter=SelectorDimFilter("dim1", "bar", null) for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// filter=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction)
// and transformSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, mapper.readValue(mapper.writeValueAsString(new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null)), new TypeReference<Map<String, Object>>() {
}), indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, mapper.readValue(mapper.writeValueAsString(new TransformSpec(new SelectorDimFilter("dim1", "bar", null), null)), new TypeReference<Map<String, Object>>() {
}), indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, mapper.readValue(mapper.writeValueAsString(new TransformSpec(null, null)), new TypeReference<Map<String, Object>>() {
}), indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
// Auto compaction config sets filter=SelectorDimFilter("dim1", "bar", null)
CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, new UserCompactionTaskTransformConfig(new SelectorDimFilter("dim1", "bar", null)), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00, interval 2017-10-04T00:00:00/2017-10-05T00:00:00, and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
Assert.assertTrue(iterator.hasNext());
List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
Assert.assertTrue(iterator.hasNext());
expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
Assert.assertTrue(iterator.hasNext());
expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
// No more
Assert.assertFalse(iterator.hasNext());
// Auto compaction config sets filter=null
iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, new UserCompactionTaskTransformConfig(null), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// No more
Assert.assertFalse(iterator.hasNext());
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class CompactionTask method createDataSchema.
private static DataSchema createDataSchema(String dataSource, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments, @Nullable DimensionsSpec dimensionsSpec, @Nullable ClientCompactionTaskTransformSpec transformSpec, @Nullable AggregatorFactory[] metricsSpec, @Nonnull ClientCompactionTaskGranularitySpec granularitySpec) {
// check index metadata &
// Decide which values to propagate (i.e. carry over) for rollup & queryGranularity
final SettableSupplier<Boolean> rollup = new SettableSupplier<>();
final SettableSupplier<Granularity> queryGranularity = new SettableSupplier<>();
decideRollupAndQueryGranularityCarryOver(rollup, queryGranularity, queryableIndexAndSegments);
final Interval totalInterval = JodaUtils.umbrellaInterval(queryableIndexAndSegments.stream().map(p -> p.rhs.getInterval()).collect(Collectors.toList()));
final Granularity queryGranularityToUse;
if (granularitySpec.getQueryGranularity() == null) {
queryGranularityToUse = queryGranularity.get();
log.info("Generate compaction task spec with segments original query granularity [%s]", queryGranularityToUse);
} else {
queryGranularityToUse = granularitySpec.getQueryGranularity();
log.info("Generate compaction task spec with new query granularity overrided from input [%s]", queryGranularityToUse);
}
final GranularitySpec uniformGranularitySpec = new UniformGranularitySpec(Preconditions.checkNotNull(granularitySpec.getSegmentGranularity()), queryGranularityToUse, granularitySpec.isRollup() == null ? rollup.get() : granularitySpec.isRollup(), Collections.singletonList(totalInterval));
// find unique dimensions
final DimensionsSpec finalDimensionsSpec = dimensionsSpec == null ? createDimensionsSpec(queryableIndexAndSegments) : dimensionsSpec;
final AggregatorFactory[] finalMetricsSpec = metricsSpec == null ? createMetricsSpec(queryableIndexAndSegments) : metricsSpec;
return new DataSchema(dataSource, new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), finalDimensionsSpec, finalMetricsSpec, uniformGranularitySpec, transformSpec == null ? null : new TransformSpec(transformSpec.getFilter(), null));
}
use of org.apache.druid.segment.transform.TransformSpec in project druid by druid-io.
the class IndexTaskTest method testTransformSpec.
@Test
public void testTransformSpec() throws Exception {
File tmpDir = temporaryFolder.newFolder();
File tmpFile = File.createTempFile("druid", "index", tmpDir);
try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
writer.write("2014-01-01T00:00:10Z,a,an|array,1|2|3,1\n");
writer.write("2014-01-01T01:00:20Z,b,another|array,3|4,1\n");
writer.write("2014-01-01T02:00:30Z,c,and|another,0|1,1\n");
}
final DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "dimt", "dimtarray1", "dimtarray2", "dimtnum_array")));
final List<String> columns = Arrays.asList("ts", "dim", "dim_array", "dim_num_array", "val");
final String listDelimiter = "|";
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim", "b", null), ImmutableList.of(new ExpressionTransform("dimt", "concat(dim,dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray1", "array(dim, dim)", ExprMacroTable.nil()), new ExpressionTransform("dimtarray2", "map(d -> concat(d, 'foo'), dim_array)", ExprMacroTable.nil()), new ExpressionTransform("dimtnum_array", "map(d -> d + 3, dim_num_array)", ExprMacroTable.nil())));
final IndexTuningConfig tuningConfig = createTuningConfigWithMaxRowsPerSegment(2, false);
final IndexIngestionSpec indexIngestionSpec;
if (useInputFormatApi) {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, new CsvInputFormat(columns, listDelimiter, null, false, 0), transformSpec, null, tuningConfig, false, false);
} else {
indexIngestionSpec = createIngestionSpec(jsonMapper, tmpDir, new CSVParseSpec(DEFAULT_TIMESTAMP_SPEC, dimensionsSpec, listDelimiter, columns, false, 0), transformSpec, null, tuningConfig, false, false);
}
IndexTask indexTask = new IndexTask(null, null, indexIngestionSpec, null);
Assert.assertEquals(indexTask.getId(), indexTask.getGroupId());
final List<DataSegment> segments = runTask(indexTask).rhs;
Assert.assertEquals(1, segments.size());
DataSegment segment = segments.get(0);
final File segmentFile = segmentCacheManager.getSegmentFiles(segment);
final WindowedStorageAdapter adapter = new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(segmentFile)), segment.getInterval());
final Sequence<Cursor> cursorSequence = adapter.getAdapter().makeCursors(null, segment.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
final List<Map<String, Object>> transforms = cursorSequence.map(cursor -> {
final DimensionSelector selector1 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimt", "dimt"));
final DimensionSelector selector2 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray1", "dimtarray1"));
final DimensionSelector selector3 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtarray2", "dimtarray2"));
final DimensionSelector selector4 = cursor.getColumnSelectorFactory().makeDimensionSelector(new DefaultDimensionSpec("dimtnum_array", "dimtnum_array"));
Map<String, Object> row = new HashMap<>();
row.put("dimt", selector1.defaultGetObject());
row.put("dimtarray1", selector2.defaultGetObject());
row.put("dimtarray2", selector3.defaultGetObject());
row.put("dimtnum_array", selector4.defaultGetObject());
cursor.advance();
return row;
}).toList();
Assert.assertEquals(1, transforms.size());
Assert.assertEquals("bb", transforms.get(0).get("dimt"));
Assert.assertEquals(ImmutableList.of("b", "b"), transforms.get(0).get("dimtarray1"));
Assert.assertEquals(ImmutableList.of("anotherfoo", "arrayfoo"), transforms.get(0).get("dimtarray2"));
Assert.assertEquals(ImmutableList.of("6.0", "7.0"), transforms.get(0).get("dimtnum_array"));
Assert.assertEquals(DATASOURCE, segments.get(0).getDataSource());
Assert.assertEquals(Intervals.of("2014/P1D"), segments.get(0).getInterval());
Assert.assertEquals(NumberedShardSpec.class, segments.get(0).getShardSpec().getClass());
Assert.assertEquals(0, segments.get(0).getShardSpec().getPartitionNum());
}
Aggregations