Search in sources :

Example 31 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class ITAutoCompactionTest method testAutoCompactionDutyWithSegmentGranularityAndSmallerSegmentGranularityCoveringMultipleSegmentsInTimelineAndDropExistingTrue.

@Test
public void testAutoCompactionDutyWithSegmentGranularityAndSmallerSegmentGranularityCoveringMultipleSegmentsInTimelineAndDropExistingTrue() throws Exception {
    loadData(INDEX_TASK);
    try (final Closeable ignored = unloader(fullDatasourceName)) {
        final List<String> intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName);
        intervalsBeforeCompaction.sort(null);
        // 4 segments across 2 days (4 total)...
        verifySegmentsCount(4);
        verifyQuery(INDEX_QUERIES_RESOURCE);
        Granularity newGranularity = Granularities.YEAR;
        // Set dropExisting to true
        submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), true);
        List<String> expectedIntervalAfterCompaction = new ArrayList<>();
        // We wil have one segment with interval of 2013-01-01/2014-01-01 (compacted with YEAR)
        for (String interval : intervalsBeforeCompaction) {
            for (Interval newinterval : newGranularity.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
                expectedIntervalAfterCompaction.add(newinterval.toString());
            }
        }
        forceTriggerAutoCompaction(1);
        verifyQuery(INDEX_QUERIES_RESOURCE);
        verifySegmentsCompacted(1, MAX_ROWS_PER_SEGMENT_COMPACTED);
        checkCompactionIntervals(expectedIntervalAfterCompaction);
        loadData(INDEX_TASK);
        verifySegmentsCount(5);
        verifyQuery(INDEX_QUERIES_RESOURCE);
        // 5 segments. 1 compacted YEAR segment and 4 newly ingested DAY segments across 2 days
        // We wil have one segment with interval of 2013-01-01/2014-01-01 (compacted with YEAR) from the compaction earlier
        // two segments with interval of 2013-08-31/2013-09-01 (newly ingested with DAY)
        // and two segments with interval of 2013-09-01/2013-09-02 (newly ingested with DAY)
        expectedIntervalAfterCompaction.addAll(intervalsBeforeCompaction);
        checkCompactionIntervals(expectedIntervalAfterCompaction);
        newGranularity = Granularities.MONTH;
        // Set dropExisting to true
        submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), true);
        // Since dropExisting is set to true...
        // This will submit a single compaction task for interval of 2013-01-01/2014-01-01 with MONTH granularity
        expectedIntervalAfterCompaction = new ArrayList<>();
        // and one segments with interval of 2013-10-01/2013-11-01 (compacted with MONTH)
        for (String interval : intervalsBeforeCompaction) {
            for (Interval newinterval : Granularities.MONTH.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
                expectedIntervalAfterCompaction.add(newinterval.toString());
            }
        }
        forceTriggerAutoCompaction(2);
        verifyQuery(INDEX_QUERIES_RESOURCE);
        verifySegmentsCompacted(2, MAX_ROWS_PER_SEGMENT_COMPACTED);
        checkCompactionIntervals(expectedIntervalAfterCompaction);
    }
}
Also used : Closeable(java.io.Closeable) ArrayList(java.util.ArrayList) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Interval(org.joda.time.Interval) Test(org.testng.annotations.Test) AbstractIndexerTest(org.apache.druid.tests.indexer.AbstractIndexerTest) AbstractITBatchIndexTest(org.apache.druid.tests.indexer.AbstractITBatchIndexTest)

Example 32 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class GroupByQuery method computeUniversalTimestamp.

/**
 * Computes the timestamp that will be returned by {@link #getUniversalTimestamp()}.
 */
@Nullable
private DateTime computeUniversalTimestamp() {
    final String timestampStringFromContext = getContextValue(CTX_KEY_FUDGE_TIMESTAMP, "");
    final Granularity granularity = getGranularity();
    if (!timestampStringFromContext.isEmpty()) {
        return DateTimes.utc(Long.parseLong(timestampStringFromContext));
    } else if (Granularities.ALL.equals(granularity)) {
        final List<Interval> intervals = getIntervals();
        if (intervals.isEmpty()) {
            // null, the "universal timestamp" of nothing
            return null;
        }
        final DateTime timeStart = intervals.get(0).getStart();
        return granularity.getIterable(new Interval(timeStart, timeStart.plus(1))).iterator().next().getStart();
    } else {
        return null;
    }
}
Also used : List(java.util.List) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ComparableList(org.apache.druid.segment.data.ComparableList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) IntList(it.unimi.dsi.fastutil.ints.IntList) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval) Nullable(javax.annotation.Nullable)

Example 33 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class GranularityTest method testIsFinerComparator.

@Test
public void testIsFinerComparator() {
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(NONE, SECOND) < 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(SECOND, NONE) > 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(NONE, MINUTE) < 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(MINUTE, NONE) > 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(DAY, MONTH) < 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(Granularities.YEAR, ALL) < 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(Granularities.ALL, YEAR) > 0);
    // Distinct references are needed to avoid intelli-j complain about compare being called on itself
    // thus the variables
    Granularity day = DAY;
    Granularity none = NONE;
    Granularity all = ALL;
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(DAY, day) == 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(NONE, none) == 0);
    Assert.assertTrue(Granularity.IS_FINER_THAN.compare(ALL, all) == 0);
}
Also used : Granularity(org.apache.druid.java.util.common.granularity.Granularity) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Test(org.junit.Test)

Example 34 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class IndexTask method collectIntervalsAndShardSpecs.

private Map<Interval, Optional<HyperLogLogCollector>> collectIntervalsAndShardSpecs(ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, InputSource inputSource, File tmpDir, GranularitySpec granularitySpec, @Nonnull PartitionsSpec partitionsSpec, boolean determineIntervals) throws IOException {
    final Map<Interval, Optional<HyperLogLogCollector>> hllCollectors = new TreeMap<>(Comparators.intervalsByStartThenEnd());
    final Granularity queryGranularity = granularitySpec.getQueryGranularity();
    final Predicate<InputRow> rowFilter = inputRow -> {
        if (inputRow == null) {
            return false;
        }
        if (determineIntervals) {
            return true;
        }
        final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
        return optInterval.isPresent();
    };
    try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(tmpDir, ingestionSchema.getDataSchema(), inputSource, inputSource.needsFormat() ? getInputFormat(ingestionSchema) : null, rowFilter, determinePartitionsMeters, determinePartitionsParseExceptionHandler)) {
        while (inputRowIterator.hasNext()) {
            final InputRow inputRow = inputRowIterator.next();
            final Interval interval;
            if (determineIntervals) {
                interval = granularitySpec.getSegmentGranularity().bucket(inputRow.getTimestamp());
            } else {
                final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
                // this interval must exist since it passed the rowFilter
                assert optInterval.isPresent();
                interval = optInterval.get();
            }
            if (partitionsSpec.needsDeterminePartitions(false)) {
                hllCollectors.computeIfAbsent(interval, intv -> Optional.of(HyperLogLogCollector.makeLatestCollector()));
                List<Object> groupKey = Rows.toGroupKey(queryGranularity.bucketStart(inputRow.getTimestampFromEpoch()), inputRow);
                hllCollectors.get(interval).get().add(HASH_FUNCTION.hashBytes(jsonMapper.writeValueAsBytes(groupKey)).asBytes());
            } else {
                // we don't need to determine partitions but we still need to determine intervals, so add an Optional.absent()
                // for the interval and don't instantiate a HLL collector
                hllCollectors.putIfAbsent(interval, Optional.absent());
            }
            determinePartitionsMeters.incrementProcessed();
        }
    }
    // These metrics are reported in generateAndPublishSegments()
    if (determinePartitionsMeters.getThrownAway() > 0) {
        log.warn("Unable to find a matching interval for [%,d] events", determinePartitionsMeters.getThrownAway());
    }
    if (determinePartitionsMeters.getUnparseable() > 0) {
        log.warn("Unable to parse [%,d] events", determinePartitionsMeters.getUnparseable());
    }
    return hllCollectors;
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Comparators(org.apache.druid.java.util.common.guava.Comparators) Produces(javax.ws.rs.Produces) IndexSpec(org.apache.druid.segment.IndexSpec) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) IngestionState(org.apache.druid.indexer.IngestionState) CompletePartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.CompletePartitionAnalysis) MediaType(javax.ws.rs.core.MediaType) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Optional(com.google.common.base.Optional) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) LinearPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis) Property(org.apache.druid.indexer.Property) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) BaseAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BaseAppenderatorDriver) FirehoseFactoryToInputSourceAdaptor(org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) AppenderatorConfig(org.apache.druid.segment.realtime.appenderator.AppenderatorConfig) GET(javax.ws.rs.GET) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Rows(org.apache.druid.data.input.Rows) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) UOE(org.apache.druid.java.util.common.UOE) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) IndexMerger(org.apache.druid.segment.IndexMerger) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) PartialHashSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TreeMap(java.util.TreeMap) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) DataSchema(org.apache.druid.segment.indexing.DataSchema) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) HashPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.HashPartitionAnalysis) PartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.PartitionAnalysis) TimeoutException(java.util.concurrent.TimeoutException) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) DefaultIndexTaskInputRowIteratorBuilder(org.apache.druid.indexing.common.task.batch.parallel.iterator.DefaultIndexTaskInputRowIteratorBuilder) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Context(javax.ws.rs.core.Context) Predicate(java.util.function.Predicate) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) IngestionSpec(org.apache.druid.segment.indexing.IngestionSpec) Objects(java.util.Objects) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) HashFunction(com.google.common.hash.HashFunction) Logger(org.apache.druid.java.util.common.logger.Logger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Hashing(com.google.common.hash.Hashing) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) JsonTypeName(com.fasterxml.jackson.annotation.JsonTypeName) InputSource(org.apache.druid.data.input.InputSource) ImmutableList(com.google.common.collect.ImmutableList) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) Nonnull(javax.annotation.Nonnull) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) BatchIOConfig(org.apache.druid.segment.indexing.BatchIOConfig) SecondaryPartitionType(org.apache.druid.indexer.partitions.SecondaryPartitionType) Period(org.joda.time.Period) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) CircularBuffer(org.apache.druid.utils.CircularBuffer) TimeUnit(java.util.concurrent.TimeUnit) Checks(org.apache.druid.indexer.Checks) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Collections(java.util.Collections) Optional(com.google.common.base.Optional) InputRow(org.apache.druid.data.input.InputRow) TreeMap(java.util.TreeMap) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Interval(org.joda.time.Interval)

Example 35 with Granularity

use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.

the class PartialDimensionCardinalityTask method determineCardinalities.

private Map<Interval, byte[]> determineCardinalities(CloseableIterator<InputRow> inputRowIterator, GranularitySpec granularitySpec) {
    Map<Interval, HllSketch> intervalToCardinalities = new HashMap<>();
    while (inputRowIterator.hasNext()) {
        InputRow inputRow = inputRowIterator.next();
        // null rows are filtered out by FilteringCloseableInputRowIterator
        DateTime timestamp = inputRow.getTimestamp();
        final Interval interval;
        if (granularitySpec.inputIntervals().isEmpty()) {
            interval = granularitySpec.getSegmentGranularity().bucket(timestamp);
        } else {
            final Optional<Interval> optInterval = granularitySpec.bucketInterval(timestamp);
            // this interval must exist since it passed the rowFilter
            assert optInterval.isPresent();
            interval = optInterval.get();
        }
        Granularity queryGranularity = granularitySpec.getQueryGranularity();
        HllSketch hllSketch = intervalToCardinalities.computeIfAbsent(interval, (intervalKey) -> DimensionCardinalityReport.createHllSketchForReport());
        // For cardinality estimation, we want to consider unique rows instead of unique hash buckets and therefore
        // we do not use partition dimensions in computing the group key
        List<Object> groupKey = HashPartitioner.extractKeys(Collections.emptyList(), queryGranularity.bucketStart(timestamp).getMillis(), inputRow);
        try {
            hllSketch.update(jsonMapper.writeValueAsBytes(groupKey));
        } catch (JsonProcessingException jpe) {
            throw new RuntimeException(jpe);
        }
    }
    // Serialize the collectors for sending to the supervisor task
    Map<Interval, byte[]> newMap = new HashMap<>();
    for (Map.Entry<Interval, HllSketch> entry : intervalToCardinalities.entrySet()) {
        newMap.put(entry.getKey(), entry.getValue().toCompactByteArray());
    }
    return newMap;
}
Also used : HllSketch(org.apache.datasketches.hll.HllSketch) HashMap(java.util.HashMap) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DateTime(org.joda.time.DateTime) InputRow(org.apache.druid.data.input.InputRow) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) HashMap(java.util.HashMap) Map(java.util.Map) Interval(org.joda.time.Interval)

Aggregations

Granularity (org.apache.druid.java.util.common.granularity.Granularity)58 Interval (org.joda.time.Interval)27 ArrayList (java.util.ArrayList)22 DateTime (org.joda.time.DateTime)19 Test (org.junit.Test)16 List (java.util.List)14 Map (java.util.Map)14 HashMap (java.util.HashMap)13 Nullable (javax.annotation.Nullable)12 PeriodGranularity (org.apache.druid.java.util.common.granularity.PeriodGranularity)12 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)12 Period (org.joda.time.Period)11 ISE (org.apache.druid.java.util.common.ISE)8 Result (org.apache.druid.query.Result)8 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)7 ImmutableList (com.google.common.collect.ImmutableList)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)6 LockGranularity (org.apache.druid.indexing.common.LockGranularity)6 Sequence (org.apache.druid.java.util.common.guava.Sequence)6