use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.
the class DruidInputSource method getTimelineForSegmentIds.
public static List<TimelineObjectHolder<String, DataSegment>> getTimelineForSegmentIds(CoordinatorClient coordinatorClient, String dataSource, List<WindowedSegmentId> segmentIds) {
final SortedMap<Interval, TimelineObjectHolder<String, DataSegment>> timeline = new TreeMap<>(Comparators.intervalsByStartThenEnd());
for (WindowedSegmentId windowedSegmentId : Preconditions.checkNotNull(segmentIds, "segmentIds")) {
final DataSegment segment = coordinatorClient.fetchUsedSegment(dataSource, windowedSegmentId.getSegmentId());
for (Interval interval : windowedSegmentId.getIntervals()) {
final TimelineObjectHolder<String, DataSegment> existingHolder = timeline.get(interval);
if (existingHolder != null) {
if (!existingHolder.getVersion().equals(segment.getVersion())) {
throw new ISE("Timeline segments with the same interval should have the same version: " + "existing version[%s] vs new segment[%s]", existingHolder.getVersion(), segment);
}
existingHolder.getObject().add(segment.getShardSpec().createChunk(segment));
} else {
timeline.put(interval, new TimelineObjectHolder<>(interval, segment.getInterval(), segment.getVersion(), new PartitionHolder<>(segment.getShardSpec().createChunk(segment))));
}
}
}
// Validate that none of the given windows overlaps (except for when multiple segments share exactly the
// same interval).
Interval lastInterval = null;
for (Interval interval : timeline.keySet()) {
if (lastInterval != null && interval.overlaps(lastInterval)) {
throw new IAE("Distinct intervals in input segments may not overlap: [%s] vs [%s]", lastInterval, interval);
}
lastInterval = interval;
}
return new ArrayList<>(timeline.values());
}
use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.
the class VersionedIntervalTimelineTestBase method assertValues.
static void assertValues(List<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expected, List<TimelineObjectHolder<String, OvershadowableInteger>> actual) {
Assert.assertEquals("Sizes did not match.", expected.size(), actual.size());
Iterator<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expectedIter = expected.iterator();
Iterator<TimelineObjectHolder<String, OvershadowableInteger>> actualIter = actual.iterator();
while (expectedIter.hasNext()) {
Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>> pair = expectedIter.next();
TimelineObjectHolder<String, OvershadowableInteger> holder = actualIter.next();
Assert.assertEquals(pair.lhs, holder.getInterval());
Assert.assertEquals(pair.rhs.lhs, holder.getVersion());
final List<PartitionChunk<OvershadowableInteger>> expectedChunks = Lists.newArrayList(pair.rhs.rhs);
final List<PartitionChunk<OvershadowableInteger>> actualChunks = Lists.newArrayList(holder.getObject());
Assert.assertEquals(expectedChunks.size(), actualChunks.size());
for (int i = 0; i < expectedChunks.size(); i++) {
// Check partitionNumber first
Assert.assertEquals(expectedChunks.get(i), actualChunks.get(i));
final OvershadowableInteger expectedInteger = expectedChunks.get(i).getObject();
final OvershadowableInteger actualInteger = actualChunks.get(i).getObject();
Assert.assertEquals(expectedInteger, actualInteger);
}
}
}
use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.
the class VersionedIntervalTimeline method addAll.
public void addAll(final Iterator<PartitionChunkEntry<VersionType, ObjectType>> objects) {
lock.writeLock().lock();
try {
final IdentityHashMap<TimelineEntry, Interval> allEntries = new IdentityHashMap<>();
while (objects.hasNext()) {
PartitionChunkEntry<VersionType, ObjectType> chunkEntry = objects.next();
PartitionChunk<ObjectType> object = chunkEntry.getChunk();
Interval interval = chunkEntry.getInterval();
VersionType version = chunkEntry.getVersion();
Map<VersionType, TimelineEntry> exists = allTimelineEntries.get(interval);
TimelineEntry entry;
if (exists == null) {
entry = new TimelineEntry(interval, version, new PartitionHolder<>(object));
TreeMap<VersionType, TimelineEntry> versionEntry = new TreeMap<>(versionComparator);
versionEntry.put(version, entry);
allTimelineEntries.put(interval, versionEntry);
numObjects.incrementAndGet();
} else {
entry = exists.get(version);
if (entry == null) {
entry = new TimelineEntry(interval, version, new PartitionHolder<>(object));
exists.put(version, entry);
numObjects.incrementAndGet();
} else {
PartitionHolder<ObjectType> partitionHolder = entry.getPartitionHolder();
if (partitionHolder.add(object)) {
numObjects.incrementAndGet();
}
}
}
allEntries.put(entry, interval);
}
// "isComplete" is O(objects in holder) so defer it to the end of addAll.
for (Entry<TimelineEntry, Interval> entry : allEntries.entrySet()) {
Interval interval = entry.getValue();
if (entry.getKey().getPartitionHolder().isComplete()) {
add(completePartitionsTimeline, interval, entry.getKey());
}
add(incompletePartitionsTimeline, interval, entry.getKey());
}
} finally {
lock.writeLock().unlock();
}
}
use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.
the class IngestSegmentFirehoseFactoryTest method testGetUniqueDimensionsAndMetrics.
@Test
public void testGetUniqueDimensionsAndMetrics() {
final int numSegmentsPerPartitionChunk = 5;
final int numPartitionChunksPerTimelineObject = 10;
final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
final Interval interval = Intervals.of("2017-01-01/2017-01-02");
final String version = "1";
final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "dim" + suffix).collect(Collectors.toList());
final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "met" + suffix).collect(Collectors.toList());
final DataSegment segment = new DataSegment("ds", interval, version, ImmutableMap.of(), dims, metrics, new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), 1, 1);
final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(i, numPartitionChunksPerTimelineObject, segment);
chunks.add(partitionChunk);
}
final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(interval, version, new PartitionHolder<>(chunks));
timelineSegments.add(timelineHolder);
}
final String[] expectedDims = new String[] { "dim9", "dim10", "dim11", "dim12", "dim13", "dim8", "dim7", "dim6", "dim5", "dim4", "dim3", "dim2", "dim1", "dim0" };
final String[] expectedMetrics = new String[] { "met9", "met10", "met11", "met12", "met13", "met8", "met7", "met6", "met5", "met4", "met3", "met2", "met1", "met0" };
Assert.assertEquals(Arrays.asList(expectedDims), ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null));
Assert.assertEquals(Arrays.asList(expectedMetrics), ReingestionTimelineUtils.getUniqueMetrics(timelineSegments));
}
use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.
the class DruidInputSource method fixedFormatReader.
@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
// noinspection ConstantConditions
final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
// noinspection ConstantConditions
return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
}).iterator();
final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
final InputRowSchema inputRowSchemaToUse;
if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
// Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
} else {
inputRowSchemaToUse = inputRowSchema;
}
if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
// Slight chance the user did this intentionally, but not likely. Log a warning.
LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
}
return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
Aggregations