Examples with DataSchema - org.apache.druid.segment.indexing.DataSchema

Example 61 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class TaskLifecycleTest method testIndexTaskFailure.

@Test
public void testIndexTaskFailure() throws Exception {
    final Task indexTask = new IndexTask(null, null, new IndexIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P1D"))), null, mapper), new IndexIOConfig(null, new MockExceptionInputSource(), new NoopInputFormat(), false, false), new IndexTuningConfig(null, 10000, null, 10, null, null, null, null, null, null, null, indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
    final TaskStatus status = runTask(indexTask);
    Assert.assertEquals("statusCode", TaskState.FAILED, status.getStatusCode());
    Assert.assertEquals(taskLocation, status.getLocation());
    Assert.assertEquals("num segments published", 0, mdc.getPublished().size());
    Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
}

Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) IndexTask(org.apache.druid.indexing.common.task.IndexTask) KillUnusedSegmentsTask(org.apache.druid.indexing.common.task.KillUnusedSegmentsTask) Task(org.apache.druid.indexing.common.task.Task) AbstractFixedIntervalTask(org.apache.druid.indexing.common.task.AbstractFixedIntervalTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IndexTask(org.apache.druid.indexing.common.task.IndexTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) TaskStatus(org.apache.druid.indexer.TaskStatus) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) FireDepartmentTest(org.apache.druid.segment.realtime.FireDepartmentTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 62 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class TaskLifecycleTest method testResumeTasks.

@Test
public void testResumeTasks() throws Exception {
    final Task indexTask = new IndexTask(null, null, new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new MockInputSource(), new NoopInputFormat(), false, false), new IndexTuningConfig(null, 10000, null, 10, null, null, null, null, null, null, null, indexSpec, null, null, null, null, null, null, null, null, null, null, null, null)), null);
    final long startTime = System.currentTimeMillis();
    // manually insert the task into TaskStorage, waiting for TaskQueue to sync from storage
    taskQueue.start();
    taskStorage.insert(indexTask, TaskStatus.running(indexTask.getId()));
    while (tsqa.getStatus(indexTask.getId()).get().isRunnable()) {
        if (System.currentTimeMillis() > startTime + 10 * 1000) {
            throw new ISE("Where did the task go?!: %s", indexTask.getId());
        }
        Thread.sleep(100);
    }
    final TaskStatus status = taskStorage.getStatus(indexTask.getId()).get();
    final List<DataSegment> publishedSegments = BY_INTERVAL_ORDERING.sortedCopy(mdc.getPublished());
    final List<DataSegment> loggedSegments = BY_INTERVAL_ORDERING.sortedCopy(tsqa.getInsertedSegments(indexTask.getId()));
    Assert.assertEquals("statusCode", TaskState.SUCCESS, status.getStatusCode());
    Assert.assertEquals(taskLocation, status.getLocation());
    Assert.assertEquals("segments logged vs published", loggedSegments, publishedSegments);
    Assert.assertEquals("num segments published", 2, mdc.getPublished().size());
    Assert.assertEquals("num segments nuked", 0, mdc.getNuked().size());
    Assert.assertEquals("segment1 datasource", "foo", publishedSegments.get(0).getDataSource());
    Assert.assertEquals("segment1 interval", Intervals.of("2010-01-01/P1D"), publishedSegments.get(0).getInterval());
    Assert.assertEquals("segment1 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(0).getDimensions());
    Assert.assertEquals("segment1 metrics", ImmutableList.of("met"), publishedSegments.get(0).getMetrics());
    Assert.assertEquals("segment2 datasource", "foo", publishedSegments.get(1).getDataSource());
    Assert.assertEquals("segment2 interval", Intervals.of("2010-01-02/P1D"), publishedSegments.get(1).getInterval());
    Assert.assertEquals("segment2 dimensions", ImmutableList.of("dim1", "dim2"), publishedSegments.get(1).getDimensions());
    Assert.assertEquals("segment2 metrics", ImmutableList.of("met"), publishedSegments.get(1).getMetrics());
}

Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) IndexTask(org.apache.druid.indexing.common.task.IndexTask) KillUnusedSegmentsTask(org.apache.druid.indexing.common.task.KillUnusedSegmentsTask) Task(org.apache.druid.indexing.common.task.Task) AbstractFixedIntervalTask(org.apache.druid.indexing.common.task.AbstractFixedIntervalTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) IndexTask(org.apache.druid.indexing.common.task.IndexTask) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ISE(org.apache.druid.java.util.common.ISE) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) FireDepartmentTest(org.apache.druid.segment.realtime.FireDepartmentTest) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 63 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class InputSourceSamplerTest method testWithRollup.

@Test
public void testWithRollup() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(4, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("dim2", null).put("met1", 4L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(3));
}

Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 64 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class InputSourceSamplerTest method testWithDimensionSpec.

@Test
public void testWithDimensionSpec() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(ImmutableList.of(StringDimensionSchema.create("dim1"), StringDimensionSchema.create("met1")));
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, null, null, null);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(6, response.getNumRowsRead());
    Assert.assertEquals(5, response.getNumRowsIndexed());
    Assert.assertEquals(6, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "1").build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(1), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "2").build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(2), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934460000L).put("dim1", "foo").put("met1", "3").build(), null, null), data.get(2));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(3), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo2").put("met1", "4").build(), null, null), data.get(3));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("met1", "5").build(), null, null), data.get(4));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(5));
}

Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 65 with DataSchema

use of org.apache.druid.segment.indexing.DataSchema in project druid by druid-io.

the class InputSourceSamplerTest method testWithFilter.

@Test
public void testWithFilter() throws IOException {
    final TimestampSpec timestampSpec = new TimestampSpec("t", null, null);
    final DimensionsSpec dimensionsSpec = new DimensionsSpec(null);
    final TransformSpec transformSpec = new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null);
    final AggregatorFactory[] aggregatorFactories = { new LongSumAggregatorFactory("met1", "met1") };
    final GranularitySpec granularitySpec = new UniformGranularitySpec(Granularities.DAY, Granularities.HOUR, true, null);
    final DataSchema dataSchema = createDataSchema(timestampSpec, dimensionsSpec, aggregatorFactories, granularitySpec, transformSpec);
    final InputSource inputSource = createInputSource(getTestRows(), dataSchema);
    final InputFormat inputFormat = createInputFormat();
    SamplerResponse response = inputSourceSampler.sample(inputSource, inputFormat, dataSchema, null);
    Assert.assertEquals(5, response.getNumRowsRead());
    Assert.assertEquals(4, response.getNumRowsIndexed());
    Assert.assertEquals(3, response.getData().size());
    List<SamplerResponseRow> data = response.getData();
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(0), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", null).put("met1", 6L).build(), null, null), data.get(0));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(4), new SamplerTestUtils.MapAllowingNullValuesBuilder<String, Object>().put("__time", 1555934400000L).put("dim1", "foo").put("dim2", "bar").put("met1", 5L).build(), null, null), data.get(1));
    assertEqualsSamplerResponseRow(new SamplerResponseRow(getRawColumns().get(5), null, true, getUnparseableTimestampString()), data.get(2));
}

Also used : RecordSupplierInputSource(org.apache.druid.indexing.seekablestream.RecordSupplierInputSource) InlineInputSource(org.apache.druid.data.input.impl.InlineInputSource) InputSource(org.apache.druid.data.input.InputSource) SamplerResponse(org.apache.druid.client.indexing.SamplerResponse) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) JsonInputFormat(org.apache.druid.data.input.impl.JsonInputFormat) InputFormat(org.apache.druid.data.input.InputFormat) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SamplerResponseRow(org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

DataSchema (org.apache.druid.segment.indexing.DataSchema)80 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)49 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)45 Test (org.junit.Test)44 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)32 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)22 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)19 InputSource (org.apache.druid.data.input.InputSource)17 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)17 File (java.io.File)16 Map (java.util.Map)15 InputFormat (org.apache.druid.data.input.InputFormat)15 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)15 SamplerResponse (org.apache.druid.client.indexing.SamplerResponse)14 SamplerResponseRow (org.apache.druid.client.indexing.SamplerResponse.SamplerResponseRow)13 CsvInputFormat (org.apache.druid.data.input.impl.CsvInputFormat)13 Interval (org.joda.time.Interval)13 ArrayList (java.util.ArrayList)12 JsonInputFormat (org.apache.druid.data.input.impl.JsonInputFormat)12