Examples with DataSchema - org.apache.druid.segment.indexing.DataSchema

Example 51 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class RealtimeIndexTaskTest method makeRealtimeTask.

private RealtimeIndexTask makeRealtimeTask(final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, final long handoffTimeout) {
    ObjectMapper objectMapper = new DefaultObjectMapper();
    DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "auto", null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("dim1", "dim2", "dim1t"))))), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), transformSpec, objectMapper);
    RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehose.TestFirehoseFactory(), null);
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(null, 1000, null, null, new Period("P1Y"), new Period("PT10M"), null, null, new ServerTimeRejectionPolicyFactory(), null, null, null, null, 0, 0, reportParseExceptions, handoffTimeout, null, null, null);
    return new RealtimeIndexTask(taskId, null, new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig), null) {

        @Override
        protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
            return true;
        }
    };
}

Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) Period(org.joda.time.Period) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) TestFirehose(org.apache.druid.indexing.common.TestFirehose) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ServerTimeRejectionPolicyFactory(org.apache.druid.segment.realtime.plumber.ServerTimeRejectionPolicyFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper)

Example 52 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class TaskSerdeTest method testHadoopIndexTaskSerde.

@Test
public void testHadoopIndexTaskSerde() throws Exception {
    final HadoopIndexTask task = new HadoopIndexTask(null, new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P1D"))), null, jsonMapper), new HadoopIOConfig(ImmutableMap.of("paths", "bar"), null, null), null), null, null, "blah", jsonMapper, null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, null);
    final String json = jsonMapper.writeValueAsString(task);
    final HadoopIndexTask task2 = (HadoopIndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertEquals(task.getSpec().getTuningConfig().getJobProperties(), task2.getSpec().getTuningConfig().getJobProperties());
    Assert.assertEquals("blah", task.getClasspathPrefix());
    Assert.assertEquals("blah", task2.getClasspathPrefix());
}

Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) HadoopIOConfig(org.apache.druid.indexer.HadoopIOConfig) Test(org.junit.Test)

Example 53 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class ParallelIndexSupervisorTaskKillTest method newTask.

private ParallelIndexSupervisorTask newTask(Interval interval, ParallelIndexIOConfig ioConfig) {
    final TestInputSource inputSource = (TestInputSource) ioConfig.getInputSource();
    final int numTotalSubTasks = inputSource.estimateNumSplits(new NoopInputFormat(), null);
    // set up ingestion spec
    final ParallelIndexIngestionSpec ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", DEFAULT_TIMESTAMP_SPEC, DEFAULT_DIMENSIONS_SPEC, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval)), null), ioConfig, new ParallelIndexTuningConfig(null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, null, numTotalSubTasks, null, null, null, null, null, null, null, null, null, null, null, null));
    // set up test tools
    return new TestSupervisorTask(ingestionSpec, Collections.singletonMap(AbstractParallelIndexSupervisorTaskTest.DISABLE_TASK_INJECT_CONTEXT_KEY, true));
}

Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory)

Example 54 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class ParallelIndexTestingFactory method createDataSchema.

static DataSchema createDataSchema(List<Interval> granularitySpecInputIntervals) {
    GranularitySpec granularitySpec = new ArbitraryGranularitySpec(Granularities.DAY, granularitySpecInputIntervals);
    TimestampSpec timestampSpec = new TimestampSpec(SCHEMA_TIME, "auto", null);
    DimensionsSpec dimensionsSpec = new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of(SCHEMA_DIMENSION)));
    return new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] {}, granularitySpec, TransformSpec.NONE, null, NESTED_OBJECT_MAPPER);
}

Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec)

Example 55 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class AbstractMultiPhaseParallelIndexingTest method newTask.

protected ParallelIndexSupervisorTask newTask(@Nullable TimestampSpec timestampSpec, @Nullable DimensionsSpec dimensionsSpec, @Nullable InputFormat inputFormat, @Nullable ParseSpec parseSpec, Interval interval, File inputDir, String filter, PartitionsSpec partitionsSpec, int maxNumConcurrentSubTasks, boolean appendToExisting) {
    GranularitySpec granularitySpec = new UniformGranularitySpec(SEGMENT_GRANULARITY, Granularities.MINUTE, interval == null ? null : Collections.singletonList(interval));
    ParallelIndexTuningConfig tuningConfig = newTuningConfig(partitionsSpec, maxNumConcurrentSubTasks, !appendToExisting);
    final ParallelIndexIngestionSpec ingestionSpec;
    if (useInputFormatApi) {
        Preconditions.checkArgument(parseSpec == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(null, new LocalInputSource(inputDir, filter), inputFormat, appendToExisting, null);
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema(DATASOURCE, timestampSpec, dimensionsSpec, new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null), ioConfig, tuningConfig);
    } else {
        Preconditions.checkArgument(inputFormat == null);
        ParallelIndexIOConfig ioConfig = new ParallelIndexIOConfig(new LocalFirehoseFactory(inputDir, filter, null), appendToExisting);
        // noinspection unchecked
        ingestionSpec = new ParallelIndexIngestionSpec(new DataSchema("dataSource", getObjectMapper().convertValue(new StringInputRowParser(parseSpec, null), Map.class), new AggregatorFactory[] { new LongSumAggregatorFactory("val", "val") }, granularitySpec, null, getObjectMapper()), ioConfig, tuningConfig);
    }
    // set up test tools
    return new ParallelIndexSupervisorTask(null, null, null, ingestionSpec, Collections.emptyMap());
}

Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) StringInputRowParser(org.apache.druid.data.input.impl.StringInputRowParser) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) Map(java.util.Map) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource)

Aggregations

DataSchema (org.apache.druid.segment.indexing.DataSchema)80 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)49 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)45 Test (org.junit.Test)44 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)32 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)22 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)19 InputSource (org.apache.druid.data.input.InputSource)17 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)17 File (java.io.File)16 Map (java.util.Map)15 InputFormat (org.apache.druid.data.input.InputFormat)15 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)15 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)14 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)13 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)13 Interval (org.joda.time.Interval)13 ArrayList (java.util.ArrayList)12 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)12