Search in sources :

Example 1 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class CompactionTaskParallelRunTest method runIndexTask.

private void runIndexTask(@Nullable PartitionsSpec partitionsSpec, boolean appendToExisting) {
    ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, "druid*"), new CsvInputFormat(Arrays.asList("ts", "dim", "val"), "|", null, false, 0), appendToExisting, null);
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, 2, !appendToExisting);
    ParallelIndexSupervisorTask indexTask = new ParallelIndexSupervisorTask(null, null, null, new ParallelIndexIngestionSpec(new DataSchema(DATA_SOURCE, new TimestampSpec("ts", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(Arrays.asList("ts", "dim"))), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(INTERVAL_TO_INDEX)), null), ioConfig, tuningConfig), null);
    runTask(indexTask);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Example 2 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class TaskSerdeTest method testIndexTaskSerde.

@Test
public void testIndexTaskSerde() throws Exception {
    final IndexTask task = new IndexTask(null, null, new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new LocalInputSource(new File("lol"), "rofl"), new NoopInputFormat(), true, false), new IndexTuningConfig(null, null, null, 10, null, null, null, 9999, null, null, new DynamicPartitionsSpec(10000, null), indexSpec, null, 3, false, null, null, null, null, null, null, null, null, 1L)), null);
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    IndexTask.IndexIOConfig taskIoConfig = task.getIngestionSchema().getIOConfig();
    IndexTask.IndexIOConfig task2IoConfig = task2.getIngestionSchema().getIOConfig();
    Assert.assertTrue(taskIoConfig.getInputSource() instanceof LocalInputSource);
    Assert.assertTrue(task2IoConfig.getInputSource() instanceof LocalInputSource);
    Assert.assertEquals(taskIoConfig.isAppendToExisting(), task2IoConfig.isAppendToExisting());
    IndexTask.IndexTuningConfig taskTuningConfig = task.getIngestionSchema().getTuningConfig();
    IndexTask.IndexTuningConfig task2TuningConfig = task2.getIngestionSchema().getTuningConfig();
    Assert.assertEquals(taskTuningConfig.getBasePersistDirectory(), task2TuningConfig.getBasePersistDirectory());
    Assert.assertEquals(taskTuningConfig.getIndexSpec(), task2TuningConfig.getIndexSpec());
    Assert.assertEquals(taskTuningConfig.getIntermediatePersistPeriod(), task2TuningConfig.getIntermediatePersistPeriod());
    Assert.assertEquals(taskTuningConfig.getMaxPendingPersists(), task2TuningConfig.getMaxPendingPersists());
    Assert.assertEquals(taskTuningConfig.getMaxRowsInMemory(), task2TuningConfig.getMaxRowsInMemory());
    Assert.assertEquals(taskTuningConfig.getNumShards(), task2TuningConfig.getNumShards());
    Assert.assertEquals(taskTuningConfig.getMaxRowsPerSegment(), task2TuningConfig.getMaxRowsPerSegment());
    Assert.assertEquals(taskTuningConfig.isReportParseExceptions(), task2TuningConfig.isReportParseExceptions());
    Assert.assertEquals(taskTuningConfig.getAwaitSegmentAvailabilityTimeoutMillis(), task2TuningConfig.getAwaitSegmentAvailabilityTimeoutMillis());
}
Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) File(java.io.File) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) Test(org.junit.Test)

Example 3 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class TaskSerdeTest method testIndexTaskwithResourceSerde.

@Test
public void testIndexTaskwithResourceSerde() throws Exception {
    final IndexTask task = new IndexTask(null, new TaskResource("rofl", 2), new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new LocalInputSource(new File("lol"), "rofl"), new NoopInputFormat(), true, false), new IndexTuningConfig(null, null, null, 10, null, null, null, null, null, null, new DynamicPartitionsSpec(10000, null), indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
    for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
    Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertTrue(task.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
    Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
}
Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) FirehoseModule(org.apache.druid.guice.FirehoseModule) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) Module(com.fasterxml.jackson.databind.Module) FirehoseModule(org.apache.druid.guice.FirehoseModule) File(java.io.File) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) Test(org.junit.Test)

Example 4 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class PartialHashSegmentGenerateTaskTest method requiresGranularitySpecInputIntervals.

@Test
public void requiresGranularitySpecInputIntervals() {
    expectedException.expect(IllegalArgumentException.class);
    expectedException.expectMessage("Missing intervals in granularitySpec");
    new PartialHashSegmentGenerateTask(ParallelIndexTestingFactory.AUTOMATIC_ID, ParallelIndexTestingFactory.GROUP_ID, ParallelIndexTestingFactory.TASK_RESOURCE, ParallelIndexTestingFactory.SUPERVISOR_TASK_ID, ParallelIndexTestingFactory.SUBTASK_SPEC_ID, ParallelIndexTestingFactory.NUM_ATTEMPTS, ParallelIndexTestingFactory.createIngestionSpec(new LocalInputSource(new File("baseDir"), "filer"), new JsonInputFormat(null, null, null), new ParallelIndexTestingFactory.TuningConfigBuilder().build(), ParallelIndexTestingFactory.createDataSchema(null)), ParallelIndexTestingFactory.CONTEXT, null);
}
Also used : JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) File(java.io.File) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) Test(org.junit.Test)

Example 5 with LocalInputSource

use of org.apache.druid.data.input.impl.LocalInputSource in project druid by druid-io.

the class RangePartitionTaskKillTest method newTask.

protected ParallelIndexSupervisorTask newTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting, int succeedsBeforeFailing) {
    GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
    final ParallelIndexIngestionSpec ingestionSpec;
    Preconditions.checkArgument(parseSpec == null);
    ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
    ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
    return new ParallelIndexSupervisorTaskTest(null, null, null, ingestionSpec, null, Collections.emptyMap(), succeedsBeforeFailing);
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Aggregations

LocalInputSource (org.apache.druid.data.input.impl.LocalInputSource)8 DataSchema (org.apache.druid.segment.indexing.DataSchema)7 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)6 File (java.io.File)4 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)4 Test (org.junit.Test)4 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)3 ParallelIndexTuningConfig (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig)3 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)3 Map (java.util.Map)2 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)2 NoopInputFormat (org.apache.druid.data.input.impl.NoopInputFormat)2 StringInputRowParser (org.apache.druid.data.input.impl.StringInputRowParser)2 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)2 IndexIOConfig (org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig)2 IndexIngestionSpec (org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec)2 IndexTuningConfig (org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig)2 DoubleSumAggregatorFactory (org.apache.druid.query.aggregation.DoubleSumAggregatorFactory)2 LocalFirehoseFactory (org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory)2 Module (com.fasterxml.jackson.databind.Module)1