use of io.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class AppendTask method merge.
@Override
public File merge(final TaskToolbox toolbox, final Map<DataSegment, File> segments, final File outDir) throws Exception {
VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural().nullsFirst());
for (DataSegment segment : segments.keySet()) {
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
}
final Iterable<SegmentToMergeHolder> segmentsToMerge = Iterables.concat(Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<String, DataSegment>, Iterable<SegmentToMergeHolder>>() {
@Override
public Iterable<SegmentToMergeHolder> apply(final TimelineObjectHolder<String, DataSegment> input) {
return Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, SegmentToMergeHolder>() {
@Nullable
@Override
public SegmentToMergeHolder apply(PartitionChunk<DataSegment> chunkInput) {
DataSegment segment = chunkInput.getObject();
return new SegmentToMergeHolder(segment, input.getInterval(), Preconditions.checkNotNull(segments.get(segment), "File for segment %s", segment.getIdentifier()));
}
});
}
}));
List<IndexableAdapter> adapters = Lists.newArrayList();
for (final SegmentToMergeHolder holder : segmentsToMerge) {
adapters.add(new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(toolbox.getIndexIO().loadIndex(holder.getFile())), new Predicate<Rowboat>() {
@Override
public boolean apply(Rowboat input) {
return holder.getInterval().contains(input.getTimestamp());
}
}));
}
IndexMerger indexMerger = buildV9Directly ? toolbox.getIndexMergerV9() : toolbox.getIndexMerger();
return indexMerger.append(adapters, aggregators == null ? null : aggregators.toArray(new AggregatorFactory[aggregators.size()]), outDir, indexSpec);
}
use of io.druid.timeline.TimelineObjectHolder in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method allocatePendingSegment.
@Override
public SegmentIdentifier allocatePendingSegment(final String dataSource, final String sequenceName, final String previousSegmentId, final Interval interval, final String maxVersion) throws IOException {
Preconditions.checkNotNull(dataSource, "dataSource");
Preconditions.checkNotNull(sequenceName, "sequenceName");
Preconditions.checkNotNull(interval, "interval");
Preconditions.checkNotNull(maxVersion, "maxVersion");
final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId;
return connector.retryTransaction(new TransactionCallback<SegmentIdentifier>() {
@Override
public SegmentIdentifier inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception {
final List<byte[]> existingBytes = handle.createQuery(String.format("SELECT payload FROM %s WHERE " + "dataSource = :dataSource AND " + "sequence_name = :sequence_name AND " + "sequence_prev_id = :sequence_prev_id", dbTables.getPendingSegmentsTable())).bind("dataSource", dataSource).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).map(ByteArrayMapper.FIRST).list();
if (!existingBytes.isEmpty()) {
final SegmentIdentifier existingIdentifier = jsonMapper.readValue(Iterables.getOnlyElement(existingBytes), SegmentIdentifier.class);
if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) {
log.info("Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
return existingIdentifier;
} else {
log.warn("Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull, interval);
return null;
}
}
// Make up a pending segment based on existing segments and pending segments in the DB. This works
// assuming that all tasks inserting segments at a particular point in time are going through the
// allocatePendingSegment flow. This should be assured through some other mechanism (like task locks).
final SegmentIdentifier newIdentifier;
final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
if (existingChunks.size() > 1) {
// Not possible to expand more than one chunk with a single segment.
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: already have [%,d] chunks.", dataSource, interval, maxVersion, existingChunks.size());
return null;
} else {
SegmentIdentifier max = null;
if (!existingChunks.isEmpty()) {
TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
max = SegmentIdentifier.fromDataSegment(existing.getObject());
}
}
}
final List<SegmentIdentifier> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
for (SegmentIdentifier pending : pendings) {
if (max == null || pending.getVersion().compareTo(max.getVersion()) > 0 || (pending.getVersion().equals(max.getVersion()) && pending.getShardSpec().getPartitionNum() > max.getShardSpec().getPartitionNum())) {
max = pending;
}
}
if (max == null) {
newIdentifier = new SegmentIdentifier(dataSource, interval, maxVersion, new NumberedShardSpec(0, 0));
} else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", dataSource, interval, maxVersion, max.getIdentifierAsString());
return null;
} else if (max.getShardSpec() instanceof LinearShardSpec) {
newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new LinearShardSpec(max.getShardSpec().getPartitionNum() + 1));
} else if (max.getShardSpec() instanceof NumberedShardSpec) {
newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new NumberedShardSpec(max.getShardSpec().getPartitionNum() + 1, ((NumberedShardSpec) max.getShardSpec()).getPartitions()));
} else {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: ShardSpec class[%s] used by [%s].", dataSource, interval, maxVersion, max.getShardSpec().getClass(), max.getIdentifierAsString());
return null;
}
}
// SELECT -> INSERT can fail due to races; callers must be prepared to retry.
// Avoiding ON DUPLICATE KEY since it's not portable.
// Avoiding try/catch since it may cause inadvertent transaction-splitting.
// UNIQUE key for the row, ensuring sequences do not fork in two directions.
// Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines
// have difficulty with large unique keys (see https://github.com/druid-io/druid/issues/2319)
final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode(Hashing.sha1().newHasher().putBytes(StringUtils.toUtf8(sequenceName)).putByte((byte) 0xff).putBytes(StringUtils.toUtf8(previousSegmentIdNotNull)).hash().asBytes());
handle.createStatement(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, sequence_name_prev_id_sha1, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, :sequence_name_prev_id_sha1, :payload)", dbTables.getPendingSegmentsTable(), connector.getQuoteString())).bind("id", newIdentifier.getIdentifierAsString()).bind("dataSource", dataSource).bind("created_date", new DateTime().toString()).bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1).bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)).execute();
log.info("Allocated pending segment [%s] for sequence[%s] (previous = [%s]) in DB", newIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
return newIdentifier;
}
}, ALLOCATE_SEGMENT_QUIET_TRIES, SQLMetadataConnector.DEFAULT_MAX_TRIES);
}
Aggregations