use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class ITAutoCompactionTest method testAutoCompactionDutyWithSegmentGranularityAndSmallerSegmentGranularityCoveringMultipleSegmentsInTimelineAndDropExistingTrue.
@Test
public void testAutoCompactionDutyWithSegmentGranularityAndSmallerSegmentGranularityCoveringMultipleSegmentsInTimelineAndDropExistingTrue() throws Exception {
loadData(INDEX_TASK);
try (final Closeable ignored = unloader(fullDatasourceName)) {
final List<String> intervalsBeforeCompaction = coordinator.getSegmentIntervals(fullDatasourceName);
intervalsBeforeCompaction.sort(null);
// 4 segments across 2 days (4 total)...
verifySegmentsCount(4);
verifyQuery(INDEX_QUERIES_RESOURCE);
Granularity newGranularity = Granularities.YEAR;
// Set dropExisting to true
submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), true);
List<String> expectedIntervalAfterCompaction = new ArrayList<>();
// We wil have one segment with interval of 2013-01-01/2014-01-01 (compacted with YEAR)
for (String interval : intervalsBeforeCompaction) {
for (Interval newinterval : newGranularity.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
expectedIntervalAfterCompaction.add(newinterval.toString());
}
}
forceTriggerAutoCompaction(1);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(1, MAX_ROWS_PER_SEGMENT_COMPACTED);
checkCompactionIntervals(expectedIntervalAfterCompaction);
loadData(INDEX_TASK);
verifySegmentsCount(5);
verifyQuery(INDEX_QUERIES_RESOURCE);
// 5 segments. 1 compacted YEAR segment and 4 newly ingested DAY segments across 2 days
// We wil have one segment with interval of 2013-01-01/2014-01-01 (compacted with YEAR) from the compaction earlier
// two segments with interval of 2013-08-31/2013-09-01 (newly ingested with DAY)
// and two segments with interval of 2013-09-01/2013-09-02 (newly ingested with DAY)
expectedIntervalAfterCompaction.addAll(intervalsBeforeCompaction);
checkCompactionIntervals(expectedIntervalAfterCompaction);
newGranularity = Granularities.MONTH;
// Set dropExisting to true
submitCompactionConfig(MAX_ROWS_PER_SEGMENT_COMPACTED, NO_SKIP_OFFSET, new UserCompactionTaskGranularityConfig(newGranularity, null, null), true);
// Since dropExisting is set to true...
// This will submit a single compaction task for interval of 2013-01-01/2014-01-01 with MONTH granularity
expectedIntervalAfterCompaction = new ArrayList<>();
// and one segments with interval of 2013-10-01/2013-11-01 (compacted with MONTH)
for (String interval : intervalsBeforeCompaction) {
for (Interval newinterval : Granularities.MONTH.getIterable(new Interval(interval, ISOChronology.getInstanceUTC()))) {
expectedIntervalAfterCompaction.add(newinterval.toString());
}
}
forceTriggerAutoCompaction(2);
verifyQuery(INDEX_QUERIES_RESOURCE);
verifySegmentsCompacted(2, MAX_ROWS_PER_SEGMENT_COMPACTED);
checkCompactionIntervals(expectedIntervalAfterCompaction);
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class GroupByQuery method computeUniversalTimestamp.
/**
* Computes the timestamp that will be returned by {@link #getUniversalTimestamp()}.
*/
@Nullable
private DateTime computeUniversalTimestamp() {
final String timestampStringFromContext = getContextValue(CTX_KEY_FUDGE_TIMESTAMP, "");
final Granularity granularity = getGranularity();
if (!timestampStringFromContext.isEmpty()) {
return DateTimes.utc(Long.parseLong(timestampStringFromContext));
} else if (Granularities.ALL.equals(granularity)) {
final List<Interval> intervals = getIntervals();
if (intervals.isEmpty()) {
// null, the "universal timestamp" of nothing
return null;
}
final DateTime timeStart = intervals.get(0).getStart();
return granularity.getIterable(new Interval(timeStart, timeStart.plus(1))).iterator().next().getStart();
} else {
return null;
}
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class GranularityTest method testIsFinerComparator.
@Test
public void testIsFinerComparator() {
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(NONE, SECOND) < 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(SECOND, NONE) > 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(NONE, MINUTE) < 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(MINUTE, NONE) > 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(DAY, MONTH) < 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(Granularities.YEAR, ALL) < 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(Granularities.ALL, YEAR) > 0);
// Distinct references are needed to avoid intelli-j complain about compare being called on itself
// thus the variables
Granularity day = DAY;
Granularity none = NONE;
Granularity all = ALL;
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(DAY, day) == 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(NONE, none) == 0);
Assert.assertTrue(Granularity.IS_FINER_THAN.compare(ALL, all) == 0);
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class IndexTask method collectIntervalsAndShardSpecs.
private Map<Interval, Optional<HyperLogLogCollector>> collectIntervalsAndShardSpecs(ObjectMapper jsonMapper, IndexIngestionSpec ingestionSchema, InputSource inputSource, File tmpDir, GranularitySpec granularitySpec, @Nonnull PartitionsSpec partitionsSpec, boolean determineIntervals) throws IOException {
final Map<Interval, Optional<HyperLogLogCollector>> hllCollectors = new TreeMap<>(Comparators.intervalsByStartThenEnd());
final Granularity queryGranularity = granularitySpec.getQueryGranularity();
final Predicate<InputRow> rowFilter = inputRow -> {
if (inputRow == null) {
return false;
}
if (determineIntervals) {
return true;
}
final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
return optInterval.isPresent();
};
try (final CloseableIterator<InputRow> inputRowIterator = AbstractBatchIndexTask.inputSourceReader(tmpDir, ingestionSchema.getDataSchema(), inputSource, inputSource.needsFormat() ? getInputFormat(ingestionSchema) : null, rowFilter, determinePartitionsMeters, determinePartitionsParseExceptionHandler)) {
while (inputRowIterator.hasNext()) {
final InputRow inputRow = inputRowIterator.next();
final Interval interval;
if (determineIntervals) {
interval = granularitySpec.getSegmentGranularity().bucket(inputRow.getTimestamp());
} else {
final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
// this interval must exist since it passed the rowFilter
assert optInterval.isPresent();
interval = optInterval.get();
}
if (partitionsSpec.needsDeterminePartitions(false)) {
hllCollectors.computeIfAbsent(interval, intv -> Optional.of(HyperLogLogCollector.makeLatestCollector()));
List<Object> groupKey = Rows.toGroupKey(queryGranularity.bucketStart(inputRow.getTimestampFromEpoch()), inputRow);
hllCollectors.get(interval).get().add(HASH_FUNCTION.hashBytes(jsonMapper.writeValueAsBytes(groupKey)).asBytes());
} else {
// we don't need to determine partitions but we still need to determine intervals, so add an Optional.absent()
// for the interval and don't instantiate a HLL collector
hllCollectors.putIfAbsent(interval, Optional.absent());
}
determinePartitionsMeters.incrementProcessed();
}
}
// These metrics are reported in generateAndPublishSegments()
if (determinePartitionsMeters.getThrownAway() > 0) {
log.warn("Unable to find a matching interval for [%,d] events", determinePartitionsMeters.getThrownAway());
}
if (determinePartitionsMeters.getUnparseable() > 0) {
log.warn("Unable to parse [%,d] events", determinePartitionsMeters.getUnparseable());
}
return hllCollectors;
}
use of org.apache.druid.java.util.common.granularity.Granularity in project druid by druid-io.
the class PartialDimensionCardinalityTask method determineCardinalities.
private Map<Interval, byte[]> determineCardinalities(CloseableIterator<InputRow> inputRowIterator, GranularitySpec granularitySpec) {
Map<Interval, HllSketch> intervalToCardinalities = new HashMap<>();
while (inputRowIterator.hasNext()) {
InputRow inputRow = inputRowIterator.next();
// null rows are filtered out by FilteringCloseableInputRowIterator
DateTime timestamp = inputRow.getTimestamp();
final Interval interval;
if (granularitySpec.inputIntervals().isEmpty()) {
interval = granularitySpec.getSegmentGranularity().bucket(timestamp);
} else {
final Optional<Interval> optInterval = granularitySpec.bucketInterval(timestamp);
// this interval must exist since it passed the rowFilter
assert optInterval.isPresent();
interval = optInterval.get();
}
Granularity queryGranularity = granularitySpec.getQueryGranularity();
HllSketch hllSketch = intervalToCardinalities.computeIfAbsent(interval, (intervalKey) -> DimensionCardinalityReport.createHllSketchForReport());
// For cardinality estimation, we want to consider unique rows instead of unique hash buckets and therefore
// we do not use partition dimensions in computing the group key
List<Object> groupKey = HashPartitioner.extractKeys(Collections.emptyList(), queryGranularity.bucketStart(timestamp).getMillis(), inputRow);
try {
hllSketch.update(jsonMapper.writeValueAsBytes(groupKey));
} catch (JsonProcessingException jpe) {
throw new RuntimeException(jpe);
}
}
// Serialize the collectors for sending to the supervisor task
Map<Interval, byte[]> newMap = new HashMap<>();
for (Map.Entry<Interval, HllSketch> entry : intervalToCardinalities.entrySet()) {
newMap.put(entry.getKey(), entry.getValue().toCompactByteArray());
}
return newMap;
}
Aggregations