use of io.druid.segment.realtime.appenderator.SegmentIdentifier in project hive by apache.
the class DruidRecordWriter method pushSegments.
private void pushSegments(List<SegmentIdentifier> segmentsToPush) {
try {
SegmentsAndMetadata segmentsAndMetadata = appenderator.push(segmentsToPush, committerSupplier.get()).get();
final HashSet<String> pushedSegmentIdentifierHashSet = new HashSet<>();
for (DataSegment pushedSegment : segmentsAndMetadata.getSegments()) {
pushedSegmentIdentifierHashSet.add(SegmentIdentifier.fromDataSegment(pushedSegment).getIdentifierAsString());
final Path segmentDescriptorOutputPath = DruidStorageHandlerUtils.makeSegmentDescriptorOutputPath(pushedSegment, segmentsDescriptorDir);
DruidStorageHandlerUtils.writeSegmentDescriptor(fileSystem, pushedSegment, segmentDescriptorOutputPath);
LOG.info(String.format("Pushed the segment [%s] and persisted the descriptor located at [%s]", pushedSegment, segmentDescriptorOutputPath));
}
final HashSet<String> toPushSegmentsHashSet = new HashSet(FluentIterable.from(segmentsToPush).transform(new Function<SegmentIdentifier, String>() {
@Nullable
@Override
public String apply(@Nullable SegmentIdentifier input) {
return input.getIdentifierAsString();
}
}).toList());
if (!pushedSegmentIdentifierHashSet.equals(toPushSegmentsHashSet)) {
throw new IllegalStateException(String.format("was asked to publish [%s] but was able to publish only [%s]", Joiner.on(", ").join(toPushSegmentsHashSet), Joiner.on(", ").join(pushedSegmentIdentifierHashSet)));
}
for (SegmentIdentifier dataSegmentId : segmentsToPush) {
LOG.info("Dropping segment {}", dataSegmentId.toString());
appenderator.drop(dataSegmentId).get();
}
LOG.info(String.format("Published [%,d] segments.", segmentsToPush.size()));
} catch (InterruptedException e) {
LOG.error(String.format("got interrupted, failed to push [%,d] segments.", segmentsToPush.size()), e);
Thread.currentThread().interrupt();
} catch (IOException | ExecutionException e) {
LOG.error(String.format("Failed to push [%,d] segments.", segmentsToPush.size()), e);
Throwables.propagate(e);
}
}
use of io.druid.segment.realtime.appenderator.SegmentIdentifier in project druid by druid-io.
the class SegmentAllocateAction method perform.
@Override
public SegmentIdentifier perform(final Task task, final TaskActionToolbox toolbox) throws IOException {
int attempt = 0;
while (true) {
attempt++;
if (!task.getDataSource().equals(dataSource)) {
throw new IAE("Task dataSource must match action dataSource, [%s] != [%s].", task.getDataSource(), dataSource);
}
final IndexerMetadataStorageCoordinator msc = toolbox.getIndexerMetadataStorageCoordinator();
// 1) if something overlaps our timestamp, use that
// 2) otherwise try preferredSegmentGranularity & going progressively smaller
final List<Interval> tryIntervals = Lists.newArrayList();
final Interval rowInterval = queryGranularity.bucket(timestamp);
final Set<DataSegment> usedSegmentsForRow = ImmutableSet.copyOf(msc.getUsedSegmentsForInterval(dataSource, rowInterval));
if (usedSegmentsForRow.isEmpty()) {
// segment granularity. Try that first, and then progressively smaller ones if it fails.
for (Granularity gran : Granularity.granularitiesFinerThan(preferredSegmentGranularity)) {
tryIntervals.add(gran.bucket(timestamp));
}
} else {
// Existing segment(s) exist for this row; use the interval of the first one.
tryIntervals.add(usedSegmentsForRow.iterator().next().getInterval());
}
for (final Interval tryInterval : tryIntervals) {
if (tryInterval.contains(rowInterval)) {
log.debug("Trying to allocate pending segment for rowInterval[%s], segmentInterval[%s].", rowInterval, tryInterval);
final TaskLock tryLock = toolbox.getTaskLockbox().tryLock(task, tryInterval).orNull();
if (tryLock != null) {
final SegmentIdentifier identifier = msc.allocatePendingSegment(dataSource, sequenceName, previousSegmentId, tryInterval, tryLock.getVersion());
if (identifier != null) {
return identifier;
} else {
log.debug("Could not allocate pending segment for rowInterval[%s], segmentInterval[%s].", rowInterval, tryInterval);
}
} else {
log.debug("Could not acquire lock for rowInterval[%s], segmentInterval[%s].", rowInterval, tryInterval);
}
}
}
if (!ImmutableSet.copyOf(msc.getUsedSegmentsForInterval(dataSource, rowInterval)).equals(usedSegmentsForRow)) {
if (attempt < MAX_ATTEMPTS) {
final long shortRandomSleep = 50 + (long) (Math.random() * 450);
log.debug("Used segment set changed for rowInterval[%s]. Retrying segment allocation in %,dms (attempt = %,d).", rowInterval, shortRandomSleep, attempt);
try {
Thread.sleep(shortRandomSleep);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
} else {
log.error("Used segment set changed for rowInterval[%s]. Not trying again (attempt = %,d).", rowInterval, attempt);
return null;
}
} else {
return null;
}
}
}
use of io.druid.segment.realtime.appenderator.SegmentIdentifier in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method allocatePendingSegment.
@Override
public SegmentIdentifier allocatePendingSegment(final String dataSource, final String sequenceName, final String previousSegmentId, final Interval interval, final String maxVersion) throws IOException {
Preconditions.checkNotNull(dataSource, "dataSource");
Preconditions.checkNotNull(sequenceName, "sequenceName");
Preconditions.checkNotNull(interval, "interval");
Preconditions.checkNotNull(maxVersion, "maxVersion");
final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId;
return connector.retryTransaction(new TransactionCallback<SegmentIdentifier>() {
@Override
public SegmentIdentifier inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception {
final List<byte[]> existingBytes = handle.createQuery(String.format("SELECT payload FROM %s WHERE " + "dataSource = :dataSource AND " + "sequence_name = :sequence_name AND " + "sequence_prev_id = :sequence_prev_id", dbTables.getPendingSegmentsTable())).bind("dataSource", dataSource).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).map(ByteArrayMapper.FIRST).list();
if (!existingBytes.isEmpty()) {
final SegmentIdentifier existingIdentifier = jsonMapper.readValue(Iterables.getOnlyElement(existingBytes), SegmentIdentifier.class);
if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) {
log.info("Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
return existingIdentifier;
} else {
log.warn("Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull, interval);
return null;
}
}
// Make up a pending segment based on existing segments and pending segments in the DB. This works
// assuming that all tasks inserting segments at a particular point in time are going through the
// allocatePendingSegment flow. This should be assured through some other mechanism (like task locks).
final SegmentIdentifier newIdentifier;
final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
if (existingChunks.size() > 1) {
// Not possible to expand more than one chunk with a single segment.
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: already have [%,d] chunks.", dataSource, interval, maxVersion, existingChunks.size());
return null;
} else {
SegmentIdentifier max = null;
if (!existingChunks.isEmpty()) {
TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
max = SegmentIdentifier.fromDataSegment(existing.getObject());
}
}
}
final List<SegmentIdentifier> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
for (SegmentIdentifier pending : pendings) {
if (max == null || pending.getVersion().compareTo(max.getVersion()) > 0 || (pending.getVersion().equals(max.getVersion()) && pending.getShardSpec().getPartitionNum() > max.getShardSpec().getPartitionNum())) {
max = pending;
}
}
if (max == null) {
newIdentifier = new SegmentIdentifier(dataSource, interval, maxVersion, new NumberedShardSpec(0, 0));
} else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", dataSource, interval, maxVersion, max.getIdentifierAsString());
return null;
} else if (max.getShardSpec() instanceof LinearShardSpec) {
newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new LinearShardSpec(max.getShardSpec().getPartitionNum() + 1));
} else if (max.getShardSpec() instanceof NumberedShardSpec) {
newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new NumberedShardSpec(max.getShardSpec().getPartitionNum() + 1, ((NumberedShardSpec) max.getShardSpec()).getPartitions()));
} else {
log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: ShardSpec class[%s] used by [%s].", dataSource, interval, maxVersion, max.getShardSpec().getClass(), max.getIdentifierAsString());
return null;
}
}
// SELECT -> INSERT can fail due to races; callers must be prepared to retry.
// Avoiding ON DUPLICATE KEY since it's not portable.
// Avoiding try/catch since it may cause inadvertent transaction-splitting.
// UNIQUE key for the row, ensuring sequences do not fork in two directions.
// Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines
// have difficulty with large unique keys (see https://github.com/druid-io/druid/issues/2319)
final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode(Hashing.sha1().newHasher().putBytes(StringUtils.toUtf8(sequenceName)).putByte((byte) 0xff).putBytes(StringUtils.toUtf8(previousSegmentIdNotNull)).hash().asBytes());
handle.createStatement(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, sequence_name_prev_id_sha1, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, :sequence_name_prev_id_sha1, :payload)", dbTables.getPendingSegmentsTable(), connector.getQuoteString())).bind("id", newIdentifier.getIdentifierAsString()).bind("dataSource", dataSource).bind("created_date", new DateTime().toString()).bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1).bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)).execute();
log.info("Allocated pending segment [%s] for sequence[%s] (previous = [%s]) in DB", newIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
return newIdentifier;
}
}, ALLOCATE_SEGMENT_QUIET_TRIES, SQLMetadataConnector.DEFAULT_MAX_TRIES);
}
use of io.druid.segment.realtime.appenderator.SegmentIdentifier in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method getPendingSegmentsForIntervalWithHandle.
private List<SegmentIdentifier> getPendingSegmentsForIntervalWithHandle(final Handle handle, final String dataSource, final Interval interval) throws IOException {
final List<SegmentIdentifier> identifiers = Lists.newArrayList();
final ResultIterator<byte[]> dbSegments = handle.createQuery(String.format("SELECT payload FROM %1$s WHERE dataSource = :dataSource AND start <= :end and %2$send%2$s >= :start", dbTables.getPendingSegmentsTable(), connector.getQuoteString())).bind("dataSource", dataSource).bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()).map(ByteArrayMapper.FIRST).iterator();
while (dbSegments.hasNext()) {
final byte[] payload = dbSegments.next();
final SegmentIdentifier identifier = jsonMapper.readValue(payload, SegmentIdentifier.class);
if (interval.overlaps(identifier.getInterval())) {
identifiers.add(identifier);
}
}
dbSegments.close();
return identifiers;
}
use of io.druid.segment.realtime.appenderator.SegmentIdentifier in project druid by druid-io.
the class ActionBasedUsedSegmentCheckerTest method testBasic.
@Test
public void testBasic() throws IOException {
final TaskActionClient taskActionClient = EasyMock.createMock(TaskActionClient.class);
EasyMock.expect(taskActionClient.submit(new SegmentListUsedAction("bar", null, ImmutableList.of(new Interval("2002/P1D"))))).andReturn(ImmutableList.of(DataSegment.builder().dataSource("bar").interval(new Interval("2002/P1D")).shardSpec(new LinearShardSpec(0)).version("b").build(), DataSegment.builder().dataSource("bar").interval(new Interval("2002/P1D")).shardSpec(new LinearShardSpec(1)).version("b").build()));
EasyMock.expect(taskActionClient.submit(new SegmentListUsedAction("foo", null, ImmutableList.of(new Interval("2000/P1D"), new Interval("2001/P1D"))))).andReturn(ImmutableList.of(DataSegment.builder().dataSource("foo").interval(new Interval("2000/P1D")).shardSpec(new LinearShardSpec(0)).version("a").build(), DataSegment.builder().dataSource("foo").interval(new Interval("2000/P1D")).shardSpec(new LinearShardSpec(1)).version("a").build(), DataSegment.builder().dataSource("foo").interval(new Interval("2001/P1D")).shardSpec(new LinearShardSpec(1)).version("b").build(), DataSegment.builder().dataSource("foo").interval(new Interval("2002/P1D")).shardSpec(new LinearShardSpec(1)).version("b").build()));
EasyMock.replay(taskActionClient);
final UsedSegmentChecker checker = new ActionBasedUsedSegmentChecker(taskActionClient);
final Set<DataSegment> segments = checker.findUsedSegments(ImmutableSet.of(new SegmentIdentifier("foo", new Interval("2000/P1D"), "a", new LinearShardSpec(1)), new SegmentIdentifier("foo", new Interval("2001/P1D"), "b", new LinearShardSpec(0)), new SegmentIdentifier("bar", new Interval("2002/P1D"), "b", new LinearShardSpec(0))));
Assert.assertEquals(ImmutableSet.of(DataSegment.builder().dataSource("foo").interval(new Interval("2000/P1D")).shardSpec(new LinearShardSpec(1)).version("a").build(), DataSegment.builder().dataSource("bar").interval(new Interval("2002/P1D")).shardSpec(new LinearShardSpec(0)).version("b").build()), segments);
EasyMock.verify(taskActionClient);
}
Aggregations