Search in sources :

Example 6 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class IndexTaskSamplerSpecTest method testSerde.

@Test
public void testSerde() throws IOException {
    String json = "{\n" + "  \"type\": \"index\",\n" + "  \"samplerConfig\": {\n" + "    \"numRows\": 123,\n" + "    \"timeoutMs\": 2345\n" + "  },\n" + "  \"spec\": {\n" + "    \"dataSchema\": {\n" + "      \"dataSource\": \"sampler\",\n" + "      \"dimensionsSpec\": {},\n" + "      \"timestampSpec\": {\n" + "        \"missingValue\": \"1970\"\n" + "      }\n" + "    },\n" + "    \"ioConfig\": {\n" + "      \"type\": \"index\",\n" + "      \"inputSource\": {\n" + "        \"type\": \"local\",\n" + "        \"baseDir\": \"/tmp\",\n" + "        \"filter\": \"wikiticker-2015-09-12-sampled.json\"\n" + "      },\n" + "      \"inputFormat\": {\n" + "        \"type\": \"json\"\n" + "      }\n" + "    }\n" + "  }\n" + "}";
    Capture<InputSource> capturedInputSource = EasyMock.newCapture();
    Capture<InputFormat> capturedInputFormat = EasyMock.newCapture();
    Capture<DataSchema> capturedDataSchema = EasyMock.newCapture();
    Capture<SamplerConfig> capturedSamplerConfig = EasyMock.newCapture();
    IndexTaskSamplerSpec spec = MAPPER.readValue(json, IndexTaskSamplerSpec.class);
    EasyMock.expect(inputSourceSampler.sample(EasyMock.capture(capturedInputSource), EasyMock.capture(capturedInputFormat), EasyMock.capture(capturedDataSchema), EasyMock.capture(capturedSamplerConfig))).andReturn(new SamplerResponse(0, 0, null));
    replayAll();
    spec.sample();
    verifyAll();
    InputSource inputSource = capturedInputSource.getValue();
    Assert.assertEquals(new File("/tmp"), ((LocalInputSource) inputSource).getBaseDir());
    Assert.assertEquals("wikiticker-2015-09-12-sampled.json", ((LocalInputSource) inputSource).getFilter());
    DataSchema dataSchema = capturedDataSchema.getValue();
    Assert.assertEquals("sampler", dataSchema.getDataSource());
    Assert.assertEquals(JsonInputFormat.class, capturedInputFormat.getValue().getClass());
    SamplerConfig samplerConfig = capturedSamplerConfig.getValue();
    Assert.assertEquals(123, samplerConfig.getNumRows());
    Assert.assertEquals(2345, samplerConfig.getTimeoutMs());
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) InputSource(org.apache.druid.data.input.InputSource) InputFormat(org.apache.druid.data.input.InputFormat) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) File(java.io.File) Test(org.junit.Test)

Example 7 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class HashPartitionTaskKillTest method createTestTask.

private ParallelIndexSupervisorTask createTestTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting, boolean useInputFormatApi, int succeedsBeforeFailing) {
    GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
    final ParallelIndexIngestionSpec ingestionSpec;
    if (useInputFormatApi) {
        Preconditions.checkArgument(parseSpec == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
    } else {
        Preconditions.checkArgument(inputFormat == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
        // noinspection unchecked
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
    }
    return new ParallelIndexSupervisorTaskTest(null, null, null, ingestionSpec, null, Collections.emptyMap(), succeedsBeforeFailing);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) Map(java.util.Map) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Example 8 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class AbstractMultiPhaseParallelIndexingTest method newTask.

protected ParallelIndexSupervisorTask newTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting) {
    GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
    final ParallelIndexIngestionSpec ingestionSpec;
    if (useInputFormatApi) {
        Preconditions.checkArgument(parseSpec == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
    } else {
        Preconditions.checkArgument(inputFormat == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
        // noinspection unchecked
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
    }
    // set up test tools
    return new ParallelIndexSupervisorTask(null, null, null, ingestionSpec, Collections.emptyMap());
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) Map(java.util.Map) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Aggregations

LocalInputSource (org.apache.druid.data.input.impl.LocalInputSource)8 DataSchema (org.apache.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)6 File (java.io.File)4 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)4 Test (org.junit.Test)4 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)3 ParallelIndexTuningConfig (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig)3 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)3 Map (java.util.Map)2 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)2 NoopInputFormat (org.apache.druid.data.input.impl.NoopInputFormat)2 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)2 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)2 IndexIOConfig (org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig)2 IndexIngestionSpec (org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec)2 IndexTuningConfig (org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig)2 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)2 LocalFirehoseFactory (org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory)2 Module (com.fasterxml.jackson.databind.Module)1