use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class CompactionTaskTest method assertIngestionSchema.
private void assertIngestionSchema(List<ParallelIndexIngestionSpec> ingestionSchemas, List<DimensionsSpec> expectedDimensionsSpecs, List<AggregatorFactory> expectedMetricsSpec, List<Interval> expectedSegmentIntervals, CompactionTask.CompactionTuningConfig expectedTuningConfig, Granularity expectedSegmentGranularity, Granularity expectedQueryGranularity, boolean expectedDropExisting) {
Preconditions.checkArgument(ingestionSchemas.size() == expectedDimensionsSpecs.size(), "ingesionSchemas.size()[%s] should be same with expectedDimensionsSpecs.size()[%s]", ingestionSchemas.size(), expectedDimensionsSpecs.size());
for (int i = 0; i < ingestionSchemas.size(); i++) {
final ParallelIndexIngestionSpec ingestionSchema = ingestionSchemas.get(i);
final DimensionsSpec expectedDimensionsSpec = expectedDimensionsSpecs.get(i);
// assert dataSchema
final DataSchema dataSchema = ingestionSchema.getDataSchema();
Assert.assertEquals(DATA_SOURCE, dataSchema.getDataSource());
Assert.assertEquals(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), dataSchema.getTimestampSpec());
Assert.assertEquals(new HashSet<>(expectedDimensionsSpec.getDimensions()), new HashSet<>(dataSchema.getDimensionsSpec().getDimensions()));
// metrics
Assert.assertEquals(expectedMetricsSpec, Arrays.asList(dataSchema.getAggregators()));
Assert.assertEquals(new UniformGranularitySpec(expectedSegmentGranularity, expectedQueryGranularity, false, Collections.singletonList(expectedSegmentIntervals.get(i))), dataSchema.getGranularitySpec());
// assert ioConfig
final ParallelIndexIOConfig ioConfig = ingestionSchema.getIOConfig();
Assert.assertFalse(ioConfig.isAppendToExisting());
Assert.assertEquals(expectedDropExisting, ioConfig.isDropExisting());
final InputSource inputSource = ioConfig.getInputSource();
Assert.assertTrue(inputSource instanceof DruidInputSource);
final DruidInputSource druidInputSource = (DruidInputSource) inputSource;
Assert.assertEquals(DATA_SOURCE, druidInputSource.getDataSource());
Assert.assertEquals(expectedSegmentIntervals.get(i), druidInputSource.getInterval());
Assert.assertNull(druidInputSource.getDimFilter());
// assert tuningConfig
Assert.assertEquals(expectedTuningConfig, ingestionSchema.getTuningConfig());
}
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class SeekableStreamSamplerSpec method sample.
@Override
public SamplerResponse sample() {
final InputSource inputSource;
final InputFormat inputFormat;
if (dataSchema.getParser() != null) {
inputSource = new FirehoseFactoryToInputSourceAdaptor(new SeekableStreamSamplerFirehoseFactory(), dataSchema.getParser());
inputFormat = null;
} else {
RecordSupplier<PartitionIdType, SequenceOffsetType, RecordType> recordSupplier;
try {
recordSupplier = createRecordSupplier();
} catch (Exception e) {
throw new SamplerException(e, "Unable to create RecordSupplier: %s", Throwables.getRootCause(e).getMessage());
}
inputSource = new RecordSupplierInputSource<>(ioConfig.getStream(), recordSupplier, ioConfig.isUseEarliestSequenceNumber());
inputFormat = Preconditions.checkNotNull(ioConfig.getInputFormat(), "[spec.ioConfig.inputFormat] is required");
}
return inputSourceSampler.sample(inputSource, inputFormat, dataSchema, samplerConfig);
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testWithRollup.
@Test
public void testWithRollup() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(4, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testWithDimensionSpec.
@Test
public void testWithDimensionSpec() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1"), StringDimensionSchema.create("met1")));
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "1").build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "2").build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934460000L).put("dim1", "foo").put("met1", "3").build(), null, null), data.get(2));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("met1", "4").build(), null, null), data.get(3));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "5").build(), null, null), data.get(4));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testWithFilter.
@Test
public void testWithFilter() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null);
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(5, response.getNumRowsRead());
Assert.assertEquals(4, response.getNumRowsIndexed());
Assert.assertEquals(3, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
Aggregations