Examples with UniformGranularitySpec - org.apache.druid.segment.indexing.granularity.UniformGranularitySpec

Example 71 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class KinesisSupervisorTest method getDataSchema.

private static DataSchema getDataSchema(String dataSource) {
    List<DimensionSchema> dimensions = new ArrayList<>();
    dimensions.add(StringDimensionSchema.create("dim1"));
    dimensions.add(StringDimensionSchema.create("dim2"));
    return new DataSchema(dataSource, new TimestampSpec("timestamp", "iso", null), new DimensionsSpec(dimensions), new AggregatorFactory[] { new CountAggregatorFactory("rows") }, new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, ImmutableList.of()), null);
}

Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) ArrayList(java.util.ArrayList) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema)

Example 72 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class GranularityPathSpecTest method testAddInputPath.

@Test
public void testAddInputPath() throws Exception {
    UserGroupInformation.setLoginUser(UserGroupInformation.createUserForTesting("test", new String[] { "testGroup" }));
    HadoopIngestionSpec spec = new HadoopIngestionSpec(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, ImmutableList.of(Intervals.of("2015-11-06T00:00Z/2015-11-07T00:00Z"))), null, jsonMapper), new HadoopIOConfig(null, null, null), DEFAULT_TUNING_CONFIG);
    granularityPathSpec.setDataGranularity(Granularities.HOUR);
    granularityPathSpec.setFilePattern(".*");
    granularityPathSpec.setInputFormat(TextInputFormat.class);
    Job job = Job.getInstance();
    String formatStr = "file:%s/%s;org.apache.hadoop.mapreduce.lib.input.TextInputFormat";
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=00");
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=02");
    testFolder.newFolder("test", "y=2015", "m=11", "d=06", "H=05");
    testFolder.newFile("test/y=2015/m=11/d=06/H=00/file1");
    testFolder.newFile("test/y=2015/m=11/d=06/H=02/file2");
    testFolder.newFile("test/y=2015/m=11/d=06/H=05/file3");
    testFolder.newFile("test/y=2015/m=11/d=06/H=05/file4");
    granularityPathSpec.setInputPath(testFolder.getRoot().getPath() + "/test");
    granularityPathSpec.addInputPaths(HadoopDruidIndexerConfig.fromSpec(spec), job);
    String actual = job.getConfiguration().get("mapreduce.input.multipleinputs.dir.formats");
    String expected = Joiner.on(",").join(Lists.newArrayList(StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=00/file1"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=02/file2"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file3"), StringUtils.format(formatStr, testFolder.getRoot(), "test/y=2015/m=11/d=06/H=05/file4")));
    Assert.assertEquals("Did not find expected input paths", expected, actual);
}

Also used : HadoopIngestionSpec(org.apache.druid.indexer.HadoopIngestionSpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Job(org.apache.hadoop.mapreduce.Job) HadoopIOConfig(org.apache.druid.indexer.HadoopIOConfig) Test(org.junit.Test)

Example 73 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionWithNewMetricsShouldStoreInState.

@Test
public void testRunCompactionWithNewMetricsShouldStoreInState() throws Exception {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    Assert.assertEquals(3, compactedSegments.size());
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        Map<String, String> expectedCountMetric = new HashMap<>();
        expectedCountMetric.put("type", "count");
        expectedCountMetric.put("name", "cnt");
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        CompactionState expectedState = new CompactionState(new DynamicPartitionsSpec(null, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedCountMetric, expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}

Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 74 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionWithFilterShouldStoreInState.

@Test
public void testRunCompactionWithFilterShouldStoreInState() throws Exception {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    Assert.assertEquals(3, compactedSegments.size());
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        CompactionState expectedState = new CompactionState(new DynamicPartitionsSpec(null, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}

Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 75 with UniformGranularitySpec

use of org.apache.druid.segment.indexing.granularity.UniformGranularitySpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunParallelWithMultiDimensionRangePartitioning.

@Test
public void testRunParallelWithMultiDimensionRangePartitioning() throws Exception {
    // Range partitioning is not supported with segment lock yet
    Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), 2, true)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        // Expect compaction state to exist as store compaction state by default
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        Assert.assertSame(DimensionRangeShardSpec.class, segment.getShardSpec().getClass());
        CompactionState expectedState = new CompactionState(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}

Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Aggregations

UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)91 Test (org.junit.Test)60 DataSchema (org.apache.druid.segment.indexing.DataSchema)49 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)36 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)35 DataSegment (org.apache.druid.timeline.DataSegment)33 File (java.io.File)25 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)24 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)22 Map (java.util.Map)20 Interval (org.joda.time.Interval)18 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)17 ArrayList (java.util.ArrayList)15 HashMap (java.util.HashMap)14 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 ImmutableMap (com.google.common.collect.ImmutableMap)12 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)12 GranularitySpec (org.apache.druid.segment.indexing.granularity.GranularitySpec)12 CompactionState (org.apache.druid.timeline.CompactionState)12 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)12