use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class CompactSegmentsTest method assertLastSegmentNotCompacted.
private void assertLastSegmentNotCompacted(CompactSegments compactSegments) {
// Segments of the latest interval should not be compacted
for (int i = 0; i < 3; i++) {
final String dataSource = DATA_SOURCE_PREFIX + i;
final Interval interval = Intervals.of(StringUtils.format("2017-01-09T12:00:00/2017-01-10"));
List<TimelineObjectHolder<String, DataSegment>> holders = dataSources.get(dataSource).lookup(interval);
Assert.assertEquals(1, holders.size());
for (TimelineObjectHolder<String, DataSegment> holder : holders) {
List<PartitionChunk<DataSegment>> chunks = Lists.newArrayList(holder.getObject());
Assert.assertEquals(4, chunks.size());
for (PartitionChunk<DataSegment> chunk : chunks) {
DataSegment segment = chunk.getObject();
Assert.assertEquals(interval, segment.getInterval());
Assert.assertEquals("version", segment.getVersion());
}
}
}
// Emulating realtime dataSource
final String dataSource = DATA_SOURCE_PREFIX + 0;
addMoreData(dataSource, 9);
CoordinatorStats stats = doCompactSegments(compactSegments);
Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
addMoreData(dataSource, 10);
stats = doCompactSegments(compactSegments);
Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
}
use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class VersionedIntervalTimeline method isOvershadowed.
public boolean isOvershadowed(Interval interval, VersionType version, ObjectType object) {
lock.readLock().lock();
try {
TimelineEntry entry = completePartitionsTimeline.get(interval);
if (entry != null) {
final int majorVersionCompare = versionComparator.compare(version, entry.getVersion());
if (majorVersionCompare == 0) {
for (PartitionChunk<ObjectType> chunk : entry.partitionHolder) {
if (chunk.getObject().overshadows(object)) {
return true;
}
}
return false;
} else {
return majorVersionCompare < 0;
}
}
Interval lower = completePartitionsTimeline.floorKey(new Interval(interval.getStart(), DateTimes.MAX));
if (lower == null || !lower.overlaps(interval)) {
return false;
}
Interval prev = null;
Interval curr = lower;
do {
if (// no further keys
curr == null || // a discontinuity
(prev != null && curr.getStartMillis() > prev.getEndMillis())) {
return false;
}
final TimelineEntry timelineEntry = completePartitionsTimeline.get(curr);
final int versionCompare = versionComparator.compare(version, timelineEntry.getVersion());
// lower or same version
if (versionCompare > 0) {
return false;
} else if (versionCompare == 0) {
// Intentionally use the Iterators API instead of the stream API for performance.
// noinspection ConstantConditions
final boolean nonOvershadowedObject = Iterators.all(timelineEntry.partitionHolder.iterator(), chunk -> !chunk.getObject().overshadows(object));
if (nonOvershadowedObject) {
return false;
}
}
prev = curr;
curr = completePartitionsTimeline.higherKey(curr);
} while (interval.getEndMillis() > prev.getEndMillis());
return true;
} finally {
lock.readLock().unlock();
}
}
use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class VersionedIntervalTimelineTestBase method assertValues.
static void assertValues(List<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expected, List<TimelineObjectHolder<String, OvershadowableInteger>> actual) {
Assert.assertEquals("Sizes did not match.", expected.size(), actual.size());
Iterator<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expectedIter = expected.iterator();
Iterator<TimelineObjectHolder<String, OvershadowableInteger>> actualIter = actual.iterator();
while (expectedIter.hasNext()) {
Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>> pair = expectedIter.next();
TimelineObjectHolder<String, OvershadowableInteger> holder = actualIter.next();
Assert.assertEquals(pair.lhs, holder.getInterval());
Assert.assertEquals(pair.rhs.lhs, holder.getVersion());
final List<PartitionChunk<OvershadowableInteger>> expectedChunks = Lists.newArrayList(pair.rhs.rhs);
final List<PartitionChunk<OvershadowableInteger>> actualChunks = Lists.newArrayList(holder.getObject());
Assert.assertEquals(expectedChunks.size(), actualChunks.size());
for (int i = 0; i < expectedChunks.size(); i++) {
// Check partitionNumber first
Assert.assertEquals(expectedChunks.get(i), actualChunks.get(i));
final OvershadowableInteger expectedInteger = expectedChunks.get(i).getObject();
final OvershadowableInteger actualInteger = actualChunks.get(i).getObject();
Assert.assertEquals(expectedInteger, actualInteger);
}
}
}
use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method testGetUniqueDimensionsAndMetrics.
@Test
public void testGetUniqueDimensionsAndMetrics() {
final int numSegmentsPerPartitionChunk = 5;
final int numPartitionChunksPerTimelineObject = 10;
final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
final Interval interval = Intervals.of("2017-01-01/2017-01-02");
final String version = "1";
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "dim" + suffix).collect(Collectors.toList());
final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "met" + suffix).collect(Collectors.toList());
final DataSegment segment = new DataSegment("ds", interval, version, ImmutableMap.of(), dims, metrics, new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), 1, 1);
final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(i, numPartitionChunksPerTimelineObject, segment);
chunks.add(partitionChunk);
}
final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(interval, version, new PartitionHolder<>(chunks));
timelineSegments.add(timelineHolder);
}
final String[] expectedDims = new String[] { "dim9", "dim10", "dim11", "dim12", "dim13", "dim8", "dim7", "dim6", "dim5", "dim4", "dim3", "dim2", "dim1", "dim0" };
final String[] expectedMetrics = new String[] { "met9", "met10", "met11", "met12", "met13", "met8", "met7", "met6", "met5", "met4", "met3", "met2", "met1", "met0" };
Assert.assertEquals(Arrays.asList(expectedDims), ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null));
Assert.assertEquals(Arrays.asList(expectedMetrics), ReingestionTimelineUtils.getUniqueMetrics(timelineSegments));
}
use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.
the class IngestSegmentFirehoseFactory method connect.
@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
// Download all segments locally.
// Note: this requires enough local storage space to fit all of the segments, even though
// IngestSegmentFirehose iterates over the segments in series. We may want to change this
// to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
final DataSegment segment = chunk.getObject();
segmentFileMap.computeIfAbsent(segment, k -> {
try {
return segmentCacheManager.getSegmentFiles(segment);
} catch (SegmentLoadingException e) {
throw new RuntimeException(e);
}
});
}
}
final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {
@Override
public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {
@Override
public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
final DataSegment segment = input.getObject();
try {
return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}
})));
final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
Aggregations