Examples with SingleDimensionPartitionsSpec - org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec

Example 1 with SingleDimensionPartitionsSpec

use of org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec in project druid by druid-io.

the class CompactSegmentsTest method testRunWithLockedIntervalsNoSkip.

@Test
public void testRunWithLockedIntervalsNoSkip() {
    Mockito.when(COORDINATOR_CONFIG.getCompactionSkipLockedIntervals()).thenReturn(false);
    final TestDruidLeaderClient leaderClient = new TestDruidLeaderClient(JSON_MAPPER);
    leaderClient.start();
    HttpIndexingServiceClient indexingServiceClient = new HttpIndexingServiceClient(JSON_MAPPER, leaderClient);
    // Lock all intervals for all the dataSources
    final String datasource0 = DATA_SOURCE_PREFIX + 0;
    leaderClient.lockedIntervals.computeIfAbsent(datasource0, k -> new ArrayList<>()).add(Intervals.of("2017/2018"));
    final String datasource1 = DATA_SOURCE_PREFIX + 1;
    leaderClient.lockedIntervals.computeIfAbsent(datasource1, k -> new ArrayList<>()).add(Intervals.of("2017/2018"));
    final String datasource2 = DATA_SOURCE_PREFIX + 2;
    leaderClient.lockedIntervals.computeIfAbsent(datasource2, k -> new ArrayList<>()).add(Intervals.of("2017/2018"));
    // Verify that no locked intervals are skipped
    CompactSegments compactSegments = new CompactSegments(COORDINATOR_CONFIG, JSON_MAPPER, indexingServiceClient);
    int maxTaskSlots = partitionsSpec instanceof SingleDimensionPartitionsSpec ? 5 : 3;
    final CoordinatorStats stats = doCompactSegments(compactSegments, createCompactionConfigs(1), maxTaskSlots);
    Assert.assertEquals(3, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
    Assert.assertEquals(3, leaderClient.submittedCompactionTasks.size());
    leaderClient.submittedCompactionTasks.forEach(task -> {
        System.out.println(task.getDataSource() + " : " + task.getIoConfig().getInputSpec().getInterval());
    });
    // Verify that tasks are submitted for the latest interval of each dataSource
    final Map<String, Interval> datasourceToInterval = new HashMap<>();
    leaderClient.submittedCompactionTasks.forEach(task -> datasourceToInterval.put(task.getDataSource(), task.getIoConfig().getInputSpec().getInterval()));
    Assert.assertEquals(Intervals.of("2017-01-09T00:00:00Z/2017-01-09T12:00:00Z"), datasourceToInterval.get(datasource0));
    Assert.assertEquals(Intervals.of("2017-01-09T00:00:00Z/2017-01-09T12:00:00Z"), datasourceToInterval.get(datasource1));
    Assert.assertEquals(Intervals.of("2017-01-09T00:00:00Z/2017-01-09T12:00:00Z"), datasourceToInterval.get(datasource2));
}

Also used : HttpIndexingServiceClient(org.apache.druid.client.indexing.HttpIndexingServiceClient) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) ArgumentMatchers(org.mockito.ArgumentMatchers) HttpMethod(org.jboss.netty.handler.codec.http.HttpMethod) CoordinatorStats(org.apache.druid.server.coordinator.CoordinatorStats) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) BooleanSupplier(java.util.function.BooleanSupplier) TaskPayloadResponse(org.apache.druid.client.indexing.TaskPayloadResponse) UserCompactionTaskQueryTuningConfig(org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) ClientCompactionIOConfig(org.apache.druid.client.indexing.ClientCompactionIOConfig) IndexingTotalWorkerCapacityInfo(org.apache.druid.client.indexing.IndexingTotalWorkerCapacityInfo) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) CoordinatorCompactionConfig(org.apache.druid.server.coordinator.CoordinatorCompactionConfig) ClientCompactionIntervalSpec(org.apache.druid.client.indexing.ClientCompactionIntervalSpec) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) UserCompactionTaskTransformConfig(org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) ClientTaskQuery(org.apache.druid.client.indexing.ClientTaskQuery) HttpVersion(org.jboss.netty.handler.codec.http.HttpVersion) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Streams(org.apache.druid.utils.Streams) IndexingWorkerInfo(org.apache.druid.client.indexing.IndexingWorkerInfo) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) UserCompactionTaskIOConfig(org.apache.druid.server.coordinator.UserCompactionTaskIOConfig) RunWith(org.junit.runner.RunWith) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) Before(org.junit.Before) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) TaskLocation(org.apache.druid.indexer.TaskLocation) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) RunnerTaskState(org.apache.druid.indexer.RunnerTaskState) HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DruidNode(org.apache.druid.server.DruidNode) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) Nullable(junitparams.converters.Nullable) MutableInt(org.apache.commons.lang3.mutable.MutableInt) UserCompactionTaskDimensionsConfig(org.apache.druid.server.coordinator.UserCompactionTaskDimensionsConfig) URL(java.net.URL) BiFunction(java.util.function.BiFunction) DruidCoordinatorRuntimeParams(org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams) IndexingServiceClient(org.apache.druid.client.indexing.IndexingServiceClient) CompactionState(org.apache.druid.timeline.CompactionState) AutoCompactionSnapshot(org.apache.druid.server.coordinator.AutoCompactionSnapshot) DefaultHttpResponse(org.jboss.netty.handler.codec.http.DefaultHttpResponse) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Parameterized(org.junit.runners.Parameterized) DateTimes(org.apache.druid.java.util.common.DateTimes) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) CoordinatorRuntimeParamsTestHelpers(org.apache.druid.server.coordinator.CoordinatorRuntimeParamsTestHelpers) ImmutableMap(com.google.common.collect.ImmutableMap) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Collectors(java.util.stream.Collectors) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) NodeRole(org.apache.druid.discovery.NodeRole) DruidLeaderClient(org.apache.druid.discovery.DruidLeaderClient) Intervals(org.apache.druid.java.util.common.Intervals) ClientCompactionTaskDimensionsSpec(org.apache.druid.client.indexing.ClientCompactionTaskDimensionsSpec) HttpIndexingServiceClient(org.apache.druid.client.indexing.HttpIndexingServiceClient) HashMap(java.util.HashMap) DataSourcesSnapshot(org.apache.druid.client.DataSourcesSnapshot) IndexingWorker(org.apache.druid.client.indexing.IndexingWorker) DruidNodeDiscovery(org.apache.druid.discovery.DruidNodeDiscovery) ArgumentCaptor(org.mockito.ArgumentCaptor) ImmutableList(com.google.common.collect.ImmutableList) DruidCoordinatorConfig(org.apache.druid.server.coordinator.DruidCoordinatorConfig) Request(org.apache.druid.java.util.http.client.Request) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Period(org.joda.time.Period) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Mockito(org.mockito.Mockito) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) NullHandling(org.apache.druid.common.config.NullHandling) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) HttpResponse(org.jboss.netty.handler.codec.http.HttpResponse) Collections(java.util.Collections) CoordinatorStats(org.apache.druid.server.coordinator.CoordinatorStats) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 2 with SingleDimensionPartitionsSpec

use of org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec in project druid by druid-io.

the class HadoopDruidDetermineConfigurationJobTest method testRunWithSingleDimensionPartitionsSpecCreateHashBasedNumberedShardSpecWithoutHashPartitionFunction.

@Test
public void testRunWithSingleDimensionPartitionsSpecCreateHashBasedNumberedShardSpecWithoutHashPartitionFunction() {
    final Set<Interval> intervals = ImmutableSet.of(Intervals.of("2020-01-01/P1D"), Intervals.of("2020-01-02/P1D"), Intervals.of("2020-01-03/P1D"));
    final SingleDimensionPartitionsSpec partitionsSpec = new SingleDimensionPartitionsSpec(1000, null, "dim", false);
    final HadoopDruidIndexerConfig config = Mockito.mock(HadoopDruidIndexerConfig.class);
    Mockito.when(config.isDeterminingPartitions()).thenReturn(false);
    Mockito.when(config.getPartitionsSpec()).thenReturn(partitionsSpec);
    Mockito.when(config.getSegmentGranularIntervals()).thenReturn(intervals);
    final ArgumentCaptor<Map<Long, List<HadoopyShardSpec>>> resultCaptor = ArgumentCaptor.forClass(Map.class);
    Mockito.doNothing().when(config).setShardSpecs(resultCaptor.capture());
    final HadoopDruidDetermineConfigurationJob job = new HadoopDruidDetermineConfigurationJob(config);
    Assert.assertTrue(job.run());
    final Map<Long, List<HadoopyShardSpec>> shardSpecs = resultCaptor.getValue();
    Assert.assertEquals(3, shardSpecs.size());
    for (Interval interval : intervals) {
        final List<HadoopyShardSpec> shardSpecsPerInterval = shardSpecs.get(interval.getStartMillis());
        Assert.assertEquals(1, shardSpecsPerInterval.size());
        Assert.assertEquals(new HashBasedNumberedShardSpec(0, shardSpecsPerInterval.size(), 0, shardSpecsPerInterval.size(), ImmutableList.of("dim"), null, new ObjectMapper()), shardSpecsPerInterval.get(0).getActualSpec());
    }
}

Also used : HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 3 with SingleDimensionPartitionsSpec

use of org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec in project druid by druid-io.

the class DeterminePartitionsJob method run.

@Override
public boolean run() {
    try {
        if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) {
            throw new ISE("DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec());
        }
        final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config.getPartitionsSpec();
        if (!partitionsSpec.isAssumeGrouped()) {
            groupByJob = Job.getInstance(new Configuration(), StringUtils.format("%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));
            JobHelper.injectSystemProperties(groupByJob.getConfiguration(), config);
            config.addJobProperties(groupByJob);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob);
            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());
            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL());
            // Store the jobId in the file
            if (groupByJob.getJobID() != null) {
                JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString());
            }
            try {
                if (!groupByJob.waitForCompletion(true)) {
                    log.error("Job failed: %s", groupByJob.getJobID());
                    failureCause = Utils.getFailureMessage(groupByJob, HadoopDruidIndexerConfig.JSON_MAPPER);
                    return false;
                }
            } catch (IOException ioe) {
                if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob, config.isUseYarnRMJobStatusFallback())) {
                    throw ioe;
                }
            }
        } else {
            log.info("Skipping group-by job.");
        }
        /*
       * Read grouped data and determine appropriate partitions.
       */
        final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format("%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));
        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");
        JobHelper.injectSystemProperties(dimSelectionJob.getConfiguration(), config);
        config.addJobProperties(dimSelectionJob);
        if (!partitionsSpec.isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            config.addInputPaths(dimSelectionJob);
        }
        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob, DeterminePartitionsDimSelectionPartitioner.class);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setNumReduceTasks(Iterators.size(config.getGranularitySpec().sortedBucketIntervals().iterator()));
        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob);
        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());
        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL());
        // Store the jobId in the file
        if (dimSelectionJob.getJobID() != null) {
            JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString());
        }
        try {
            if (!dimSelectionJob.waitForCompletion(true)) {
                log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
                failureCause = Utils.getFailureMessage(dimSelectionJob, HadoopDruidIndexerConfig.JSON_MAPPER);
                return false;
            }
        } catch (IOException ioe) {
            if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob, config.isUseYarnRMJobStatusFallback())) {
                throw ioe;
            }
        }
        /*
       * Load partitions determined by the previous job.
       */
        log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>();
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity);
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) {
                List<ShardSpec> specs = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() {
                });
                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i));
                }
                shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);
        return true;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) IOException(java.io.IOException) TreeMap(java.util.TreeMap) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) InvalidJobConfException(org.apache.hadoop.mapred.InvalidJobConfException) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) Job(org.apache.hadoop.mapreduce.Job) Interval(org.joda.time.Interval)

Example 4 with SingleDimensionPartitionsSpec

use of org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec in project druid by druid-io.

the class HadoopIngestionSpecTest method testPartitionsSpecMaxPartitionSize.

@Test
public void testPartitionsSpecMaxPartitionSize() {
    final HadoopIngestionSpec schema;
    try {
        schema = jsonReadWriteRead("{\n" + "    \"tuningConfig\": {\n" + "        \"type\": \"hadoop\",\n" + "        \"partitionsSpec\": {\n" + "            \"type\": \"dimension\",\n" + "            \"targetPartitionSize\": 100,\n" + "            \"maxPartitionSize\" : null,\n" + "            \"partitionDimension\" : \"foo\"\n" + "        }\n" + "    }\n" + "}", HadoopIngestionSpec.class);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    PartitionsSpec partitionsSpec = schema.getTuningConfig().getPartitionsSpec();
    Assert.assertTrue("partitionsSpec", partitionsSpec instanceof SingleDimensionPartitionsSpec);
    SingleDimensionPartitionsSpec singleDimensionPartitionsSpec = (SingleDimensionPartitionsSpec) partitionsSpec;
    Assert.assertTrue("isDeterminingPartitions", singleDimensionPartitionsSpec.needsDeterminePartitions(true));
    Assert.assertEquals("getTargetPartitionSize", 100, singleDimensionPartitionsSpec.getTargetRowsPerSegment().intValue());
    Assert.assertEquals("getMaxPartitionSize", 150, singleDimensionPartitionsSpec.getMaxRowsPerSegment().intValue());
    Assert.assertEquals("getPartitionDimension", "foo", singleDimensionPartitionsSpec.getPartitionDimension());
}

Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) Test(org.junit.Test)

Example 5 with SingleDimensionPartitionsSpec

use of org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactRangeAndDynamicPartitionedSegments.

@Test
public void testCompactRangeAndDynamicPartitionedSegments() {
    runIndexTask(new SingleDimensionPartitionsSpec(2, null, "dim", false), false);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).build();
    final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask));
    Assert.assertEquals(3, intervalToSegments.size());
    Assert.assertEquals(ImmutableSet.of(Intervals.of("2014-01-01T00/PT1H"), Intervals.of("2014-01-01T01/PT1H"), Intervals.of("2014-01-01T02/PT1H")), intervalToSegments.keySet());
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segmentsInInterval = entry.getValue();
        Assert.assertEquals(1, segmentsInInterval.size());
        final ShardSpec shardSpec = segmentsInInterval.get(0).getShardSpec();
        if (lockGranularity == LockGranularity.TIME_CHUNK) {
            Assert.assertSame(NumberedShardSpec.class, shardSpec.getClass());
            final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) shardSpec;
            Assert.assertEquals(0, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getNumCorePartitions());
        } else {
            Assert.assertSame(NumberedOverwriteShardSpec.class, shardSpec.getClass());
            final NumberedOverwriteShardSpec numberedShardSpec = (NumberedOverwriteShardSpec) shardSpec;
            Assert.assertEquals(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getAtomicUpdateGroupSize());
        }
    }
}

Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Aggregations

SingleDimensionPartitionsSpec (org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec)21 Test (org.junit.Test)18 List (java.util.List)7 Map (java.util.Map)6 DataSegment (org.apache.druid.timeline.DataSegment)6 Interval (org.joda.time.Interval)6 ImmutableList (com.google.common.collect.ImmutableList)5 ArrayList (java.util.ArrayList)5 HashedPartitionsSpec (org.apache.druid.indexer.partitions.HashedPartitionsSpec)5 PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 HashMap (java.util.HashMap)4 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)4 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)4 SingleDimensionShardSpec (org.apache.druid.timeline.partition.SingleDimensionShardSpec)4 IOException (java.io.IOException)3 TaskStatus (org.apache.druid.indexer.TaskStatus)3 TaskToolbox (org.apache.druid.indexing.common.TaskToolbox)3 TaskActionClient (org.apache.druid.indexing.common.actions.TaskActionClient)3 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)3