Search in sources :

Example 1 with ParallelIndexIOConfig

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig in project druid by druid-io.

the class CompactionTaskParallelRunTest method runIndexTask.

private void runIndexTask(@Nullable PartitionsSpec partitionsSpec, boolean appendToExisting) {
    ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, "druid*"), new CsvInputFormat(Arrays.asList("ts", "dim", "val"), "|", null, false, 0), appendToExisting, null);
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, 2, !appendToExisting);
    ParallelIndexSupervisorTask indexTask = new ParallelIndexSupervisorTask(null, null, null, new ParallelIndexIngestionSpec(new DataSchema(DATA_SOURCE, new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim"))), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(INTERVAL_TO_INDEX)), null), ioConfig, tuningConfig), null);
    runTask(indexTask);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Example 2 with ParallelIndexIOConfig

use of org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig in project druid by druid-io.

the class CompactionTaskTest method assertIngestionSchema.

private void assertIngestionSchema(List<ParallelIndexIngestionSpec> ingestionSchemas, List<DimensionsSpec> expectedDimensionsSpecs, List<AggregatorFactory> expectedMetricsSpec, List<Interval> expectedSegmentIntervals, CompactionTask.CompactionTuningConfig expectedTuningConfig, Granularity expectedSegmentGranularity, Granularity expectedQueryGranularity, boolean expectedDropExisting) {
    Preconditions.checkArgument(ingestionSchemas.size() == expectedDimensionsSpecs.size(), "ingesionSchemas.size()[%s] should be same with expectedDimensionsSpecs.size()[%s]", ingestionSchemas.size(), expectedDimensionsSpecs.size());
    for (int i = 0; i < ingestionSchemas.size(); i++) {
        final ParallelIndexIngestionSpec ingestionSchema = ingestionSchemas.get(i);
        final DimensionsSpec expectedDimensionsSpec = expectedDimensionsSpecs.get(i);
        // assert dataSchema
        final DataSchema dataSchema = ingestionSchema.getDataSchema();
        Assert.assertEquals(DATA_SOURCE, dataSchema.getDataSource());
        Assert.assertEquals(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), dataSchema.getTimestampSpec());
        Assert.assertEquals(new HashSet<>(expectedDimensionsSpec.getDimensions()), new HashSet<>(dataSchema.getDimensionsSpec().getDimensions()));
        // metrics
        Assert.assertEquals(expectedMetricsSpec, Arrays.asList(dataSchema.getAggregators()));
        Assert.assertEquals(new UniformGranularitySpec(expectedSegmentGranularity, expectedQueryGranularity, false, Collections.singletonList(expectedSegmentIntervals.get(i))), dataSchema.getGranularitySpec());
        // assert ioConfig
        final ParallelIndexIOConfig ioConfig = ingestionSchema.getIOConfig();
        Assert.assertFalse(ioConfig.isAppendToExisting());
        Assert.assertEquals(expectedDropExisting, ioConfig.isDropExisting());
        final InputSource inputSource = ioConfig.getInputSource();
        Assert.assertTrue(inputSource instanceof DruidInputSource);
        final DruidInputSource druidInputSource = (DruidInputSource) inputSource;
        Assert.assertEquals(DATA_SOURCE, druidInputSource.getDataSource());
        Assert.assertEquals(expectedSegmentIntervals.get(i), druidInputSource.getInterval());
        Assert.assertNull(druidInputSource.getDimFilter());
        // assert tuningConfig
        Assert.assertEquals(expectedTuningConfig, ingestionSchema.getTuningConfig());
    }
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) InputSource(org.apache.druid.data.input.InputSource) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec)

Aggregations

DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)2 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)2 ParallelIndexIOConfig (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig)2 ParallelIndexIngestionSpec (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec)2 DataSchema (org.apache.druid.segment.indexing.DataSchema)2 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)2 InputSource (org.apache.druid.data.input.InputSource)1 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)1 LocalInputSource (org.apache.druid.data.input.impl.LocalInputSource)1 ParallelIndexSupervisorTask (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask)1 ParallelIndexTuningConfig (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig)1 DruidInputSource (org.apache.druid.indexing.input.DruidInputSource)1 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)1