use of io.druid.timeline.DataSegment in project druid by druid-io.
the class IndexerSQLMetadataStorageCoordinator method announceHistoricalSegments.
/**
* {@inheritDoc}
*/
@Override
public SegmentPublishResult announceHistoricalSegments(final Set<DataSegment> segments, final DataSourceMetadata startMetadata, final DataSourceMetadata endMetadata) throws IOException {
if (segments.isEmpty()) {
throw new IllegalArgumentException("segment set must not be empty");
}
final String dataSource = segments.iterator().next().getDataSource();
for (DataSegment segment : segments) {
if (!dataSource.equals(segment.getDataSource())) {
throw new IllegalArgumentException("segments must all be from the same dataSource");
}
}
if ((startMetadata == null && endMetadata != null) || (startMetadata != null && endMetadata == null)) {
throw new IllegalArgumentException("start/end metadata pair must be either null or non-null");
}
// Find which segments are used (i.e. not overshadowed).
final Set<DataSegment> usedSegments = Sets.newHashSet();
for (TimelineObjectHolder<String, DataSegment> holder : VersionedIntervalTimeline.forSegments(segments).lookup(JodaUtils.ETERNITY)) {
for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
usedSegments.add(chunk.getObject());
}
}
final AtomicBoolean txnFailure = new AtomicBoolean(false);
try {
return connector.retryTransaction(new TransactionCallback<SegmentPublishResult>() {
@Override
public SegmentPublishResult inTransaction(final Handle handle, final TransactionStatus transactionStatus) throws Exception {
final Set<DataSegment> inserted = Sets.newHashSet();
if (startMetadata != null) {
final DataSourceMetadataUpdateResult result = updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata);
if (result != DataSourceMetadataUpdateResult.SUCCESS) {
transactionStatus.setRollbackOnly();
txnFailure.set(true);
if (result == DataSourceMetadataUpdateResult.FAILURE) {
throw new RuntimeException("Aborting transaction!");
} else if (result == DataSourceMetadataUpdateResult.TRY_AGAIN) {
throw new RetryTransactionException("Aborting transaction!");
}
}
}
for (final DataSegment segment : segments) {
if (announceHistoricalSegment(handle, segment, usedSegments.contains(segment))) {
inserted.add(segment);
}
}
return new SegmentPublishResult(ImmutableSet.copyOf(inserted), true);
}
}, 3, SQLMetadataConnector.DEFAULT_MAX_TRIES);
} catch (CallbackFailedException e) {
if (txnFailure.get()) {
return new SegmentPublishResult(ImmutableSet.<DataSegment>of(), false);
} else {
throw e;
}
}
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class SQLMetadataSegmentManager method enableDatasource.
@Override
public boolean enableDatasource(final String ds) {
try {
final IDBI dbi = connector.getDBI();
VersionedIntervalTimeline<String, DataSegment> segmentTimeline = connector.inReadOnlyTransaction(new TransactionCallback<VersionedIntervalTimeline<String, DataSegment>>() {
@Override
public VersionedIntervalTimeline<String, DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
return handle.createQuery(String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", ds).map(ByteArrayMapper.FIRST).fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() {
@Override
public VersionedIntervalTimeline<String, DataSegment> fold(VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException {
try {
final DataSegment segment = DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
return timeline;
} catch (Exception e) {
throw new SQLException(e.toString());
}
}
});
}
});
final List<DataSegment> segments = Lists.newArrayList();
for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline.lookup(new Interval("0000-01-01/3000-01-01"))) {
for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) {
segments.add(partitionChunk.getObject());
}
}
if (segments.isEmpty()) {
log.warn("No segments found in the database!");
return false;
}
dbi.withHandle(new HandleCallback<Void>() {
@Override
public Void withHandle(Handle handle) throws Exception {
Batch batch = handle.createBatch();
for (DataSegment segment : segments) {
batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier()));
}
batch.execute();
return null;
}
});
} catch (Exception e) {
log.error(e, "Exception enabling datasource %s", ds);
return false;
}
return true;
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class SQLMetadataSegmentManager method poll.
@Override
public void poll() {
try {
if (!started) {
return;
}
ConcurrentHashMap<String, DruidDataSource> newDataSources = new ConcurrentHashMap<String, DruidDataSource>();
log.debug("Starting polling of segment table");
// some databases such as PostgreSQL require auto-commit turned off
// to stream results back, enabling transactions disables auto-commit
//
// setting connection to read-only will allow some database such as MySQL
// to automatically use read-only transaction mode, further optimizing the query
final List<DataSegment> segments = connector.inReadOnlyTransaction(new TransactionCallback<List<DataSegment>>() {
@Override
public List<DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
return handle.createQuery(String.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).map(new ResultSetMapper<DataSegment>() {
@Override
public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException {
try {
return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(r.getBytes("payload"), DataSegment.class));
} catch (IOException e) {
log.makeAlert(e, "Failed to read segment from db.");
return null;
}
}
}).list();
}
});
if (segments == null || segments.isEmpty()) {
log.warn("No segments found in the database!");
return;
}
final Collection<DataSegment> segmentsFinal = Collections2.filter(segments, Predicates.notNull());
log.info("Polled and found %,d segments in the database", segments.size());
for (final DataSegment segment : segmentsFinal) {
String datasourceName = segment.getDataSource();
DruidDataSource dataSource = newDataSources.get(datasourceName);
if (dataSource == null) {
dataSource = new DruidDataSource(datasourceName, ImmutableMap.of("created", new DateTime().toString()));
Object shouldBeNull = newDataSources.put(datasourceName, dataSource);
if (shouldBeNull != null) {
log.warn("Just put key[%s] into dataSources and what was there wasn't null!? It was[%s]", datasourceName, shouldBeNull);
}
}
if (!dataSource.getSegments().contains(segment)) {
dataSource.addSegment(segment.getIdentifier(), segment);
}
}
synchronized (lock) {
if (started) {
dataSources.set(newDataSources);
}
}
} catch (Exception e) {
log.makeAlert(e, "Problem polling DB.").emit();
}
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class AppenderatorPlumber method mergeAndPush.
private void mergeAndPush() {
final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
final Period windowPeriod = config.getWindowPeriod();
final long windowMillis = windowPeriod.toStandardDuration().getMillis();
log.info("Starting merge and push.");
DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
long minTimestamp = minTimestampAsDate.getMillis();
final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
if (shuttingDown) {
log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
segmentsToPush.addAll(appenderatorSegments);
} else {
log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
for (SegmentIdentifier segment : appenderatorSegments) {
final Long intervalStart = segment.getInterval().getStartMillis();
if (intervalStart < minTimestamp) {
log.info("Adding entry [%s] for merge and push.", segment);
segmentsToPush.add(segment);
} else {
log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
}
}
}
log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {
@Override
public Void apply(Throwable throwable) {
final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {
@Override
public String apply(SegmentIdentifier input) {
return input.getIdentifierAsString();
}
});
log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
if (shuttingDown) {
// We're trying to shut down, and these segments failed to push. Let's just get rid of them.
// This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
cleanShutdown = false;
for (SegmentIdentifier identifier : segmentsToPush) {
dropSegment(identifier);
}
}
return null;
}
};
// WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {
@Override
public void onSuccess(SegmentsAndMetadata result) {
// Immediately publish after pushing
for (DataSegment pushedSegment : result.getSegments()) {
try {
segmentPublisher.publishSegment(pushedSegment);
} catch (Exception e) {
errorHandler.apply(e);
}
}
log.info("Published [%,d] sinks.", segmentsToPush.size());
}
@Override
public void onFailure(Throwable e) {
log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
errorHandler.apply(e);
}
});
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class FiniteAppenderatorDriver method publishAll.
/**
* Push and publish all segments to the metadata store.
*
* @param publisher segment publisher
* @param wrappedCommitter wrapped committer (from wrapCommitter)
*
* @return published segments and metadata, or null if segments could not be published due to transaction failure
* with commit metadata.
*/
private SegmentsAndMetadata publishAll(final TransactionalSegmentPublisher publisher, final Committer wrappedCommitter) throws InterruptedException {
final List<SegmentIdentifier> theSegments = ImmutableList.copyOf(appenderator.getSegments());
long nTry = 0;
while (true) {
try {
log.info("Pushing segments: [%s]", Joiner.on(", ").join(theSegments));
final SegmentsAndMetadata segmentsAndMetadata = appenderator.push(theSegments, wrappedCommitter).get();
// Sanity check
if (!segmentsToIdentifiers(segmentsAndMetadata.getSegments()).equals(Sets.newHashSet(theSegments))) {
throw new ISE("WTF?! Pushed different segments than requested. Pushed[%s], requested[%s].", Joiner.on(", ").join(identifiersToStrings(segmentsToIdentifiers(segmentsAndMetadata.getSegments()))), Joiner.on(", ").join(identifiersToStrings(theSegments)));
}
log.info("Publishing segments with commitMetadata[%s]: [%s]", segmentsAndMetadata.getCommitMetadata(), Joiner.on(", ").join(segmentsAndMetadata.getSegments()));
if (segmentsAndMetadata.getSegments().isEmpty()) {
log.info("Nothing to publish, skipping publish step.");
} else {
final boolean published = publisher.publishSegments(ImmutableSet.copyOf(segmentsAndMetadata.getSegments()), ((FiniteAppenderatorDriverMetadata) segmentsAndMetadata.getCommitMetadata()).getCallerMetadata());
if (published) {
log.info("Published segments, awaiting handoff.");
} else {
log.info("Transaction failure while publishing segments, checking if someone else beat us to it.");
if (usedSegmentChecker.findUsedSegments(segmentsToIdentifiers(segmentsAndMetadata.getSegments())).equals(Sets.newHashSet(segmentsAndMetadata.getSegments()))) {
log.info("Our segments really do exist, awaiting handoff.");
} else {
log.warn("Our segments don't exist, giving up.");
return null;
}
}
}
for (final DataSegment dataSegment : segmentsAndMetadata.getSegments()) {
handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum()), MoreExecutors.sameThreadExecutor(), new Runnable() {
@Override
public void run() {
final SegmentIdentifier identifier = SegmentIdentifier.fromDataSegment(dataSegment);
log.info("Segment[%s] successfully handed off, dropping.", identifier);
metrics.incrementHandOffCount();
final ListenableFuture<?> dropFuture = appenderator.drop(identifier);
Futures.addCallback(dropFuture, new FutureCallback<Object>() {
@Override
public void onSuccess(Object result) {
synchronized (handoffMonitor) {
handoffMonitor.notifyAll();
}
}
@Override
public void onFailure(Throwable e) {
log.warn(e, "Failed to drop segment[%s]?!");
synchronized (handoffMonitor) {
handoffMonitor.notifyAll();
}
}
});
}
});
}
return segmentsAndMetadata;
} catch (InterruptedException e) {
throw e;
} catch (Exception e) {
final long sleepMillis = computeNextRetrySleep(++nTry);
log.warn(e, "Failed publishAll (try %d), retrying in %,dms.", nTry, sleepMillis);
Thread.sleep(sleepMillis);
}
}
}
Aggregations