Search in sources :

Example 71 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class KafkaSupervisorTuningConfigTest method testSerdeWithDefaults.

@Test
public void testSerdeWithDefaults() throws Exception {
    String jsonStr = "{\"type\": \"kafka\"}";
    KafkaSupervisorTuningConfig config = (KafkaSupervisorTuningConfig) mapper.readValue(mapper.writeValueAsString(mapper.readValue(jsonStr, TuningConfig.class)), TuningConfig.class);
    Assert.assertNotNull(config.getBasePersistDirectory());
    Assert.assertEquals(new OnheapIncrementalIndex.Spec(), config.getAppendableIndexSpec());
    Assert.assertEquals(1000000, config.getMaxRowsInMemory());
    Assert.assertEquals(5_000_000, config.getMaxRowsPerSegment().intValue());
    Assert.assertEquals(new Period("PT10M"), config.getIntermediatePersistPeriod());
    Assert.assertEquals(0, config.getMaxPendingPersists());
    Assert.assertEquals(new IndexSpec(), config.getIndexSpec());
    Assert.assertEquals(new IndexSpec(), config.getIndexSpecForIntermediatePersists());
    Assert.assertEquals(false, config.isReportParseExceptions());
    Assert.assertEquals(0, config.getHandoffConditionTimeout());
    Assert.assertNull(config.getWorkerThreads());
    Assert.assertNull(config.getChatThreads());
    Assert.assertEquals(8L, (long) config.getChatRetries());
    Assert.assertEquals(Duration.standardSeconds(10), config.getHttpTimeout());
    Assert.assertEquals(Duration.standardSeconds(80), config.getShutdownTimeout());
    Assert.assertEquals(Duration.standardSeconds(30), config.getOffsetFetchPeriod());
}
Also used : TuningConfig(org.apache.druid.segment.indexing.TuningConfig) IndexSpec(org.apache.druid.segment.IndexSpec) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Period(org.joda.time.Period) Test(org.junit.Test)

Example 72 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class KafkaSupervisorTuningConfigTest method testSerdeWithNonDefaults.

@Test
public void testSerdeWithNonDefaults() throws Exception {
    String jsonStr = "{\n" + "  \"type\": \"kafka\",\n" + "  \"basePersistDirectory\": \"/tmp/xxx\",\n" + "  \"maxRowsInMemory\": 100,\n" + "  \"maxRowsPerSegment\": 100,\n" + "  \"intermediatePersistPeriod\": \"PT1H\",\n" + "  \"maxPendingPersists\": 100,\n" + "  \"reportParseExceptions\": true,\n" + "  \"handoffConditionTimeout\": 100,\n" + "  \"workerThreads\": 12,\n" + "  \"chatThreads\": 13,\n" + "  \"chatRetries\": 14,\n" + "  \"httpTimeout\": \"PT15S\",\n" + "  \"shutdownTimeout\": \"PT95S\",\n" + "  \"offsetFetchPeriod\": \"PT20S\",\n" + "  \"indexSpec\": { \"metricCompression\" : \"NONE\" },\n" + "  \"indexSpecForIntermediatePersists\": { \"dimensionCompression\" : \"uncompressed\" },\n" + "  \"appendableIndexSpec\": { \"type\" : \"onheap\" }\n" + "}";
    KafkaSupervisorTuningConfig config = (KafkaSupervisorTuningConfig) mapper.readValue(mapper.writeValueAsString(mapper.readValue(jsonStr, TuningConfig.class)), TuningConfig.class);
    Assert.assertEquals(new File("/tmp/xxx"), config.getBasePersistDirectory());
    Assert.assertEquals(new OnheapIncrementalIndex.Spec(), config.getAppendableIndexSpec());
    Assert.assertEquals(100, config.getMaxRowsInMemory());
    Assert.assertEquals(100, config.getMaxRowsPerSegment().intValue());
    Assert.assertEquals(new Period("PT1H"), config.getIntermediatePersistPeriod());
    Assert.assertEquals(100, config.getMaxPendingPersists());
    Assert.assertEquals(true, config.isReportParseExceptions());
    Assert.assertEquals(100, config.getHandoffConditionTimeout());
    Assert.assertEquals(12, (int) config.getWorkerThreads());
    Assert.assertEquals(13, (int) config.getChatThreads());
    Assert.assertEquals(14L, (long) config.getChatRetries());
    Assert.assertEquals(Duration.standardSeconds(15), config.getHttpTimeout());
    Assert.assertEquals(Duration.standardSeconds(95), config.getShutdownTimeout());
    Assert.assertEquals(Duration.standardSeconds(20), config.getOffsetFetchPeriod());
    Assert.assertEquals(new IndexSpec(null, null, CompressionStrategy.NONE, null), config.getIndexSpec());
    Assert.assertEquals(new IndexSpec(null, CompressionStrategy.UNCOMPRESSED, null, null), config.getIndexSpecForIntermediatePersists());
}
Also used : TuningConfig(org.apache.druid.segment.indexing.TuningConfig) IndexSpec(org.apache.druid.segment.IndexSpec) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Period(org.joda.time.Period) File(java.io.File) Test(org.junit.Test)

Example 73 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class CompactionTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = createIngestionSchema(toolbox, getTaskLockHelper().getLockGranularityToUse(), segmentProvider, partitionConfigurationManager, dimensionsSpec, transformSpec, metricsSpec, granularitySpec, toolbox.getCoordinatorClient(), segmentCacheManagerFactory, retryPolicyFactory, ioConfig.isDropExisting());
    final List<ParallelIndexSupervisorTask> indexTaskSpecs = IntStream.range(0, ingestionSpecs.size()).mapToObj(i -> {
        // The ID of SubtaskSpecs is used as the base sequenceName in segment allocation protocol.
        // The indexing tasks generated by the compaction task should use different sequenceNames
        // so that they can allocate valid segment IDs with no duplication.
        ParallelIndexIngestionSpec ingestionSpec = ingestionSpecs.get(i);
        final String baseSequenceName = createIndexTaskSpecId(i);
        return newTask(baseSequenceName, ingestionSpec);
    }).collect(Collectors.toList());
    if (indexTaskSpecs.isEmpty()) {
        String msg = StringUtils.format("Can't find segments from inputSpec[%s], nothing to do.", ioConfig.getInputSpec());
        log.warn(msg);
        return TaskStatus.failure(getId(), msg);
    } else {
        registerResourceCloserOnAbnormalExit(currentSubTaskHolder);
        final int totalNumSpecs = indexTaskSpecs.size();
        log.info("Generated [%d] compaction task specs", totalNumSpecs);
        int failCnt = 0;
        for (ParallelIndexSupervisorTask eachSpec : indexTaskSpecs) {
            final String json = toolbox.getJsonMapper().writerWithDefaultPrettyPrinter().writeValueAsString(eachSpec);
            if (!currentSubTaskHolder.setTask(eachSpec)) {
                String errMsg = "Task was asked to stop. Finish as failed.";
                log.info(errMsg);
                return TaskStatus.failure(getId(), errMsg);
            }
            try {
                if (eachSpec.isReady(toolbox.getTaskActionClient())) {
                    log.info("Running indexSpec: " + json);
                    final TaskStatus eachResult = eachSpec.run(toolbox);
                    if (!eachResult.isSuccess()) {
                        failCnt++;
                        log.warn("Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
                    }
                } else {
                    failCnt++;
                    log.warn("indexSpec is not ready: [%s].\nTrying the next indexSpec.", json);
                }
            } catch (Exception e) {
                failCnt++;
                log.warn(e, "Failed to run indexSpec: [%s].\nTrying the next indexSpec.", json);
            }
        }
        String msg = StringUtils.format("Ran [%d] specs, [%d] succeeded, [%d] failed", totalNumSpecs, totalNumSpecs - failCnt, failCnt);
        log.info(msg);
        return failCnt == 0 ? TaskStatus.success(getId()) : TaskStatus.failure(getId(), msg);
    }
}
Also used : Verify(org.apache.curator.shaded.com.google.common.base.Verify) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Comparators(org.apache.druid.java.util.common.guava.Comparators) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AppenderatorsManager(org.apache.druid.segment.realtime.appenderator.AppenderatorsManager) IAE(org.apache.druid.java.util.common.IAE) MultiValueHandling(org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling) BiMap(com.google.common.collect.BiMap) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) Property(org.apache.druid.indexer.Property) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) Segments(org.apache.druid.indexing.overlord.Segments) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) LockGranularity(org.apache.druid.indexing.common.LockGranularity) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) Intervals(org.apache.druid.java.util.common.Intervals) Duration(org.joda.time.Duration) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) HashMap(java.util.HashMap) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) TaskStatus(org.apache.druid.indexer.TaskStatus) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) CompactSegments(org.apache.druid.server.coordinator.duty.CompactSegments) DruidInputSource(org.apache.druid.indexing.input.DruidInputSource) Nonnull(javax.annotation.Nonnull) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) RE(org.apache.druid.java.util.common.RE) NonnullPair(org.apache.druid.java.util.common.NonnullPair) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IOException(java.io.IOException) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) File(java.io.File) HashBiMap(com.google.common.collect.HashBiMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) DimensionHandler(org.apache.druid.segment.DimensionHandler) TreeMap(java.util.TreeMap) Checks(org.apache.druid.indexer.Checks) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) RetrieveUsedSegmentsAction(org.apache.druid.indexing.common.actions.RetrieveUsedSegmentsAction) ParallelIndexIOConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIOConfig) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) ParallelIndexSupervisorTask(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexSupervisorTask) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) IOException(java.io.IOException)

Example 74 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class CompactionTaskTest method testCreateIngestionSchemaWithNumShards.

@Test
public void testCreateIngestionSchemaWithNumShards() throws IOException, SegmentLoadingException {
    final CompactionTask.CompactionTuningConfig tuningConfig = new CompactionTask.CompactionTuningConfig(null, null, null, 500000, 1000000L, null, null, null, null, new HashedPartitionsSpec(null, 3, null), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), null, null, true, false, 5000L, null, null, 10, null, null, null, null, null, null, null, null, null, null, null);
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(tuningConfig), null, null, null, null, COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(6, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), SEGMENT_INTERVALS, tuningConfig, Granularities.MONTH, Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) IndexSpec(org.apache.druid.segment.IndexSpec) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test)

Example 75 with IndexSpec

use of org.apache.druid.segment.IndexSpec in project druid by druid-io.

the class TaskLifecycleTest method setUp.

@Before
public void setUp() throws Exception {
    // mock things
    queryRunnerFactoryConglomerate = EasyMock.createStrictMock(QueryRunnerFactoryConglomerate.class);
    monitorScheduler = EasyMock.createStrictMock(MonitorScheduler.class);
    // initialize variables
    announcedSinks = 0;
    pushedSegments = 0;
    indexSpec = new IndexSpec();
    emitter = newMockEmitter();
    EmittingLogger.registerEmitter(emitter);
    mapper = TEST_UTILS.getTestObjectMapper();
    handOffCallbacks = new ConcurrentHashMap<>();
    // Set up things, the order does matter as if it is messed up then the setUp
    // should fail because of the Precondition checks in the respective setUp methods
    // For creating a customized TaskQueue see testRealtimeIndexTaskFailure test
    taskStorage = setUpTaskStorage();
    handoffNotifierFactory = setUpSegmentHandOffNotifierFactory();
    dataSegmentPusher = setUpDataSegmentPusher();
    mdc = setUpMetadataStorageCoordinator();
    tb = setUpTaskToolboxFactory(dataSegmentPusher, handoffNotifierFactory, mdc);
    taskRunner = setUpThreadPoolTaskRunner(tb);
    taskQueue = setUpTaskQueue(taskStorage, taskRunner);
}
Also used : QueryRunnerFactoryConglomerate(org.apache.druid.query.QueryRunnerFactoryConglomerate) IndexSpec(org.apache.druid.segment.IndexSpec) MonitorScheduler(org.apache.druid.java.util.metrics.MonitorScheduler) Before(org.junit.Before)

Aggregations

IndexSpec (org.apache.druid.segment.IndexSpec)89 Test (org.junit.Test)59 File (java.io.File)29 Period (org.joda.time.Period)28 RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)25 OnheapIncrementalIndex (org.apache.druid.segment.incremental.OnheapIncrementalIndex)24 ArrayList (java.util.ArrayList)21 Map (java.util.Map)20 DataSegment (org.apache.druid.timeline.DataSegment)20 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)19 IncrementalIndex (org.apache.druid.segment.incremental.IncrementalIndex)19 CompactionState (org.apache.druid.timeline.CompactionState)16 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 ImmutableMap (com.google.common.collect.ImmutableMap)14 PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)14 QueryableIndex (org.apache.druid.segment.QueryableIndex)14 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)13 InputRow (org.apache.druid.data.input.InputRow)13 Duration (org.joda.time.Duration)13 DefaultObjectMapper (org.apache.druid.jackson.DefaultObjectMapper)10