use of io.druid.timeline.DataSegment in project druid by druid-io.
the class SQLMetadataSegmentManager method enableDatasource.
@Override
public boolean enableDatasource(final String ds) {
try {
final IDBI dbi = connector.getDBI();
VersionedIntervalTimeline<String, DataSegment> segmentTimeline = connector.inReadOnlyTransaction(new TransactionCallback<VersionedIntervalTimeline<String, DataSegment>>() {
@Override
public VersionedIntervalTimeline<String, DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
return handle.createQuery(String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).bind("dataSource", ds).map(ByteArrayMapper.FIRST).fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() {
@Override
public VersionedIntervalTimeline<String, DataSegment> fold(VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException {
try {
final DataSegment segment = DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(payload, DataSegment.class));
timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
return timeline;
} catch (Exception e) {
throw new SQLException(e.toString());
}
}
});
}
});
final List<DataSegment> segments = Lists.newArrayList();
for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline.lookup(new Interval("0000-01-01/3000-01-01"))) {
for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) {
segments.add(partitionChunk.getObject());
}
}
if (segments.isEmpty()) {
log.warn("No segments found in the database!");
return false;
}
dbi.withHandle(new HandleCallback<Void>() {
@Override
public Void withHandle(Handle handle) throws Exception {
Batch batch = handle.createBatch();
for (DataSegment segment : segments) {
batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier()));
}
batch.execute();
return null;
}
});
} catch (Exception e) {
log.error(e, "Exception enabling datasource %s", ds);
return false;
}
return true;
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class SQLMetadataSegmentManager method poll.
@Override
public void poll() {
try {
if (!started) {
return;
}
ConcurrentHashMap<String, DruidDataSource> newDataSources = new ConcurrentHashMap<String, DruidDataSource>();
log.debug("Starting polling of segment table");
// some databases such as PostgreSQL require auto-commit turned off
// to stream results back, enabling transactions disables auto-commit
//
// setting connection to read-only will allow some database such as MySQL
// to automatically use read-only transaction mode, further optimizing the query
final List<DataSegment> segments = connector.inReadOnlyTransaction(new TransactionCallback<List<DataSegment>>() {
@Override
public List<DataSegment> inTransaction(Handle handle, TransactionStatus status) throws Exception {
return handle.createQuery(String.format("SELECT payload FROM %s WHERE used=true", getSegmentsTable())).setFetchSize(connector.getStreamingFetchSize()).map(new ResultSetMapper<DataSegment>() {
@Override
public DataSegment map(int index, ResultSet r, StatementContext ctx) throws SQLException {
try {
return DATA_SEGMENT_INTERNER.intern(jsonMapper.readValue(r.getBytes("payload"), DataSegment.class));
} catch (IOException e) {
log.makeAlert(e, "Failed to read segment from db.");
return null;
}
}
}).list();
}
});
if (segments == null || segments.isEmpty()) {
log.warn("No segments found in the database!");
return;
}
final Collection<DataSegment> segmentsFinal = Collections2.filter(segments, Predicates.notNull());
log.info("Polled and found %,d segments in the database", segments.size());
for (final DataSegment segment : segmentsFinal) {
String datasourceName = segment.getDataSource();
DruidDataSource dataSource = newDataSources.get(datasourceName);
if (dataSource == null) {
dataSource = new DruidDataSource(datasourceName, ImmutableMap.of("created", new DateTime().toString()));
Object shouldBeNull = newDataSources.put(datasourceName, dataSource);
if (shouldBeNull != null) {
log.warn("Just put key[%s] into dataSources and what was there wasn't null!? It was[%s]", datasourceName, shouldBeNull);
}
}
if (!dataSource.getSegments().contains(segment)) {
dataSource.addSegment(segment.getIdentifier(), segment);
}
}
synchronized (lock) {
if (started) {
dataSources.set(newDataSources);
}
}
} catch (Exception e) {
log.makeAlert(e, "Problem polling DB.").emit();
}
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class AppenderatorPlumber method mergeAndPush.
private void mergeAndPush() {
final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
final Period windowPeriod = config.getWindowPeriod();
final long windowMillis = windowPeriod.toStandardDuration().getMillis();
log.info("Starting merge and push.");
DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
long minTimestamp = minTimestampAsDate.getMillis();
final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
if (shuttingDown) {
log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
segmentsToPush.addAll(appenderatorSegments);
} else {
log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
for (SegmentIdentifier segment : appenderatorSegments) {
final Long intervalStart = segment.getInterval().getStartMillis();
if (intervalStart < minTimestamp) {
log.info("Adding entry [%s] for merge and push.", segment);
segmentsToPush.add(segment);
} else {
log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
}
}
}
log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {
@Override
public Void apply(Throwable throwable) {
final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {
@Override
public String apply(SegmentIdentifier input) {
return input.getIdentifierAsString();
}
});
log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
if (shuttingDown) {
// We're trying to shut down, and these segments failed to push. Let's just get rid of them.
// This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
cleanShutdown = false;
for (SegmentIdentifier identifier : segmentsToPush) {
dropSegment(identifier);
}
}
return null;
}
};
// WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {
@Override
public void onSuccess(SegmentsAndMetadata result) {
// Immediately publish after pushing
for (DataSegment pushedSegment : result.getSegments()) {
try {
segmentPublisher.publishSegment(pushedSegment);
} catch (Exception e) {
errorHandler.apply(e);
}
}
log.info("Published [%,d] sinks.", segmentsToPush.size());
}
@Override
public void onFailure(Throwable e) {
log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
errorHandler.apply(e);
}
});
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class FiniteAppenderatorDriver method publishAll.
/**
* Push and publish all segments to the metadata store.
*
* @param publisher segment publisher
* @param wrappedCommitter wrapped committer (from wrapCommitter)
*
* @return published segments and metadata, or null if segments could not be published due to transaction failure
* with commit metadata.
*/
private SegmentsAndMetadata publishAll(final TransactionalSegmentPublisher publisher, final Committer wrappedCommitter) throws InterruptedException {
final List<SegmentIdentifier> theSegments = ImmutableList.copyOf(appenderator.getSegments());
long nTry = 0;
while (true) {
try {
log.info("Pushing segments: [%s]", Joiner.on(", ").join(theSegments));
final SegmentsAndMetadata segmentsAndMetadata = appenderator.push(theSegments, wrappedCommitter).get();
// Sanity check
if (!segmentsToIdentifiers(segmentsAndMetadata.getSegments()).equals(Sets.newHashSet(theSegments))) {
throw new ISE("WTF?! Pushed different segments than requested. Pushed[%s], requested[%s].", Joiner.on(", ").join(identifiersToStrings(segmentsToIdentifiers(segmentsAndMetadata.getSegments()))), Joiner.on(", ").join(identifiersToStrings(theSegments)));
}
log.info("Publishing segments with commitMetadata[%s]: [%s]", segmentsAndMetadata.getCommitMetadata(), Joiner.on(", ").join(segmentsAndMetadata.getSegments()));
if (segmentsAndMetadata.getSegments().isEmpty()) {
log.info("Nothing to publish, skipping publish step.");
} else {
final boolean published = publisher.publishSegments(ImmutableSet.copyOf(segmentsAndMetadata.getSegments()), ((FiniteAppenderatorDriverMetadata) segmentsAndMetadata.getCommitMetadata()).getCallerMetadata());
if (published) {
log.info("Published segments, awaiting handoff.");
} else {
log.info("Transaction failure while publishing segments, checking if someone else beat us to it.");
if (usedSegmentChecker.findUsedSegments(segmentsToIdentifiers(segmentsAndMetadata.getSegments())).equals(Sets.newHashSet(segmentsAndMetadata.getSegments()))) {
log.info("Our segments really do exist, awaiting handoff.");
} else {
log.warn("Our segments don't exist, giving up.");
return null;
}
}
}
for (final DataSegment dataSegment : segmentsAndMetadata.getSegments()) {
handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum()), MoreExecutors.sameThreadExecutor(), new Runnable() {
@Override
public void run() {
final SegmentIdentifier identifier = SegmentIdentifier.fromDataSegment(dataSegment);
log.info("Segment[%s] successfully handed off, dropping.", identifier);
metrics.incrementHandOffCount();
final ListenableFuture<?> dropFuture = appenderator.drop(identifier);
Futures.addCallback(dropFuture, new FutureCallback<Object>() {
@Override
public void onSuccess(Object result) {
synchronized (handoffMonitor) {
handoffMonitor.notifyAll();
}
}
@Override
public void onFailure(Throwable e) {
log.warn(e, "Failed to drop segment[%s]?!");
synchronized (handoffMonitor) {
handoffMonitor.notifyAll();
}
}
});
}
});
}
return segmentsAndMetadata;
} catch (InterruptedException e) {
throw e;
} catch (Exception e) {
final long sleepMillis = computeNextRetrySleep(++nTry);
log.warn(e, "Failed publishAll (try %d), retrying in %,dms.", nTry, sleepMillis);
Thread.sleep(sleepMillis);
}
}
}
use of io.druid.timeline.DataSegment in project druid by druid-io.
the class AppenderatorImpl method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
* be run in the single-threaded pushExecutor.
*
* @param identifier sink identifier
* @param sink sink to push
*
* @return segment descriptor, or null if the sink is no longer valid
*/
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
// Bail out if this sink is null or otherwise not what we expect.
if (sinks.get(identifier) != sink) {
log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
return null;
}
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
for (FireHydrant hydrant : sink) {
if (sink.isWritable()) {
throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
}
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
}
}
}
try {
if (descriptorFile.exists()) {
// Already pushed.
log.info("Segment[%s] already pushed.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
log.info("Pushing merged index for segment[%s].", identifier);
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
List<QueryableIndex> indexes = Lists.newArrayList();
for (FireHydrant fireHydrant : sink) {
Segment segment = fireHydrant.getSegment();
final QueryableIndex queryableIndex = segment.asQueryableIndex();
log.info("Adding hydrant[%s]", fireHydrant);
indexes.add(queryableIndex);
}
final File mergedFile;
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
QueryableIndex index = indexIO.loadIndex(mergedFile);
DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
objectMapper.writeValue(descriptorFile, segment);
log.info("Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw Throwables.propagate(e);
}
}
Aggregations