use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSplitParallelIndexTaskRunner method newTaskSpec.
final SubTaskSpec<T> newTaskSpec(InputSplit split) {
final FirehoseFactory firehoseFactory;
final InputSource inputSource;
if (baseInputSource instanceof FirehoseFactoryToInputSourceAdaptor) {
firehoseFactory = ((FirehoseFactoryToInputSourceAdaptor) baseInputSource).getFirehoseFactory().withSplit(split);
inputSource = null;
} else {
firehoseFactory = null;
inputSource = baseInputSource.withSplit(split);
}
final ParallelIndexIngestionSpec subTaskIngestionSpec = new ParallelIndexIngestionSpec(ingestionSchema.getDataSchema(), new ParallelIndexIOConfig(firehoseFactory, inputSource, ingestionSchema.getIOConfig().getInputFormat(), ingestionSchema.getIOConfig().isAppendToExisting(), ingestionSchema.getIOConfig().isDropExisting()), ingestionSchema.getTuningConfig());
return createSubTaskSpec(getBaseSubtaskSpecName() + "_" + getAndIncrementNextSpecId(), getGroupId(), getTaskId(), getContext(), split, subTaskIngestionSpec);
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class DruidInputSourceTest method testSerdeUsingIntervalsAndLegacyDimensionsMetrics.
@Test
public void testSerdeUsingIntervalsAndLegacyDimensionsMetrics() throws Exception {
final String json = "{" + "\"type\":\"druid\"," + "\"dataSource\":\"foo\"," + "\"interval\":\"2000-01-01T00:00:00.000Z/2001-01-01T00:00:00.000Z\"," + "\"dimensions\":[\"a\"]," + "\"metrics\":[\"b\"]" + "}";
final InputSource inputSource = mapper.readValue(json, InputSource.class);
Assert.assertThat(inputSource, CoreMatchers.instanceOf(DruidInputSource.class));
Assert.assertEquals(new DruidInputSource("foo", Intervals.of("2000/2001"), null, null, ImmutableList.of("a"), ImmutableList.of("b"), indexIO, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, taskConfig), inputSource);
Assert.assertEquals(json, mapper.writeValueAsString(inputSource));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class CsvInputSourceSamplerTest method testCSVColumnAllNull.
@Test
public void testCSVColumnAllNull() {
final TimestampSpec timestampSpec = new TimestampSpec(null, null, DateTimes.of("1970"));
final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
final DataSchema dataSchema = new DataSchema("sampler", timestampSpec, dimensionsSpec, null, null, null);
final List<String> strCsvRows = ImmutableList.of("FirstName,LastName,Number,Gender", "J,G,,Male", "Kobe,Bryant,,Male", "Lisa, Krystal,,Female", "Michael,Jackson,,Male");
final InputSource inputSource = new InlineInputSource(String.join("\n", strCsvRows));
final InputFormat inputFormat = new CsvInputFormat(null, null, null, true, 0);
final InputSourceSampler inputSourceSampler = new InputSourceSampler();
final SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(4, response.getNumRowsRead());
Assert.assertEquals(4, response.getNumRowsIndexed());
Assert.assertEquals(4, response.getData().size());
List<SamplerResponseRow> data = response.getData();
Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "J").put("LastName", "G").put("Gender", "Male").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("Number", null).put("FirstName", "J").put("LastName", "G").put("Gender", "Male").build(), null, null), data.get(0));
Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "Kobe").put("LastName", "Bryant").put("Gender", "Male").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("__time", 0L).put("FirstName", "Kobe").put("LastName", "Bryant").put("Gender", "Male").build(), null, null), data.get(1));
Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "Lisa").put("LastName", " Krystal").put("Gender", "Female").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("__time", 0L).put("FirstName", "Lisa").put("LastName", " Krystal").put("Gender", "Female").build(), null, null), data.get(2));
Assert.assertEquals(new SamplerResponseRow(new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("Number", null).put("FirstName", "Michael").put("LastName", "Jackson").put("Gender", "Male").build(), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 0L).put("Number", null).put("FirstName", "Michael").put("LastName", "Jackson").put("Gender", "Male").build(), null, null), data.get(3));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testNoDataSchemaNumRows.
@Test
public void testNoDataSchemaNumRows() {
final InputSource inputSource = createInputSource(getTestRows(), null);
final SamplerResponse response = inputSourceSampler.sample(inputSource, createInputFormat(), null, new SamplerConfig(3, null));
Assert.assertEquals(3, response.getNumRowsRead());
Assert.assertEquals(0, response.getNumRowsIndexed());
Assert.assertEquals(3, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), null, true, unparseableTimestampErrorString(data.get(0).getInput(), 1)), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), null, true, unparseableTimestampErrorString(data.get(1).getInput(), 2)), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), null, true, unparseableTimestampErrorString(data.get(2).getInput(), 3)), data.get(2));
}
use of org.apache.druid.data.input.InputSource in project druid by druid-io.
the class InputSourceSamplerTest method testWithMoreRollup.
@Test
public void testWithMoreRollup() throws IOException {
final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1")));
final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
final InputFormat inputFormat = createInputFormat();
SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(3, response.getData().size());
List<SamplerResponseRow> data = response.getData();
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", 11L).build(), null, null), data.get(0));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("met1", 4L).build(), null, null), data.get(1));
assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}
Aggregations