use of com.google.common.base.Function in project druid by druid-io.
the class AppenderatorPlumber method mergeAndPush.
private void mergeAndPush() {
final Granularity segmentGranularity = schema.getGranularitySpec().getSegmentGranularity();
final Period windowPeriod = config.getWindowPeriod();
final long windowMillis = windowPeriod.toStandardDuration().getMillis();
log.info("Starting merge and push.");
DateTime minTimestampAsDate = segmentGranularity.bucketStart(new DateTime(Math.max(windowMillis, rejectionPolicy.getCurrMaxTime().getMillis()) - windowMillis));
long minTimestamp = minTimestampAsDate.getMillis();
final List<SegmentIdentifier> appenderatorSegments = appenderator.getSegments();
final List<SegmentIdentifier> segmentsToPush = Lists.newArrayList();
if (shuttingDown) {
log.info("Found [%,d] segments. Attempting to hand off all of them.", appenderatorSegments.size());
segmentsToPush.addAll(appenderatorSegments);
} else {
log.info("Found [%,d] segments. Attempting to hand off segments that start before [%s].", appenderatorSegments.size(), minTimestampAsDate);
for (SegmentIdentifier segment : appenderatorSegments) {
final Long intervalStart = segment.getInterval().getStartMillis();
if (intervalStart < minTimestamp) {
log.info("Adding entry [%s] for merge and push.", segment);
segmentsToPush.add(segment);
} else {
log.info("Skipping persist and merge for entry [%s] : Start time [%s] >= [%s] min timestamp required in this run. Segment will be picked up in a future run.", segment, new DateTime(intervalStart), minTimestampAsDate);
}
}
}
log.info("Found [%,d] segments to persist and merge", segmentsToPush.size());
final Function<Throwable, Void> errorHandler = new Function<Throwable, Void>() {
@Override
public Void apply(Throwable throwable) {
final List<String> segmentIdentifierStrings = Lists.transform(segmentsToPush, new Function<SegmentIdentifier, String>() {
@Override
public String apply(SegmentIdentifier input) {
return input.getIdentifierAsString();
}
});
log.makeAlert(throwable, "Failed to publish merged indexes[%s]", schema.getDataSource()).addData("segments", segmentIdentifierStrings).emit();
if (shuttingDown) {
// We're trying to shut down, and these segments failed to push. Let's just get rid of them.
// This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
cleanShutdown = false;
for (SegmentIdentifier identifier : segmentsToPush) {
dropSegment(identifier);
}
}
return null;
}
};
// WARNING: Committers.nil() here means that on-disk data can get out of sync with committing.
Futures.addCallback(appenderator.push(segmentsToPush, Committers.nil()), new FutureCallback<SegmentsAndMetadata>() {
@Override
public void onSuccess(SegmentsAndMetadata result) {
// Immediately publish after pushing
for (DataSegment pushedSegment : result.getSegments()) {
try {
segmentPublisher.publishSegment(pushedSegment);
} catch (Exception e) {
errorHandler.apply(e);
}
}
log.info("Published [%,d] sinks.", segmentsToPush.size());
}
@Override
public void onFailure(Throwable e) {
log.warn(e, "Failed to push [%,d] segments.", segmentsToPush.size());
errorHandler.apply(e);
}
});
}
use of com.google.common.base.Function in project druid by druid-io.
the class RealtimeManager method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
final Map<Integer, FireChief> partitionChiefs = chiefs.get(Iterables.getOnlyElement(query.getDataSource().getNames()));
return partitionChiefs == null ? new NoopQueryRunner<T>() : factory.getToolchest().mergeResults(factory.mergeRunners(MoreExecutors.sameThreadExecutor(), Iterables.transform(specs, new Function<SegmentDescriptor, QueryRunner<T>>() {
@Override
public QueryRunner<T> apply(SegmentDescriptor spec) {
final FireChief retVal = partitionChiefs.get(spec.getPartitionNumber());
return retVal == null ? new NoopQueryRunner<T>() : retVal.getQueryRunner(query.withQuerySegmentSpec(new SpecificSegmentSpec(spec)));
}
})));
}
use of com.google.common.base.Function in project druid by druid-io.
the class AppenderatorImpl method bootstrapSinksFromDisk.
/**
* Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
*
* @return persisted commit metadata
*/
private Object bootstrapSinksFromDisk() {
Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
final File baseDir = tuningConfig.getBasePersistDirectory();
if (!baseDir.exists()) {
return null;
}
final File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
final File commitFile = computeCommitFile();
final Committed committed;
try {
if (commitFile.exists()) {
committed = objectMapper.readValue(commitFile, Committed.class);
} else {
committed = Committed.nil();
}
} catch (Exception e) {
throw new ISE(e, "Failed to read commitFile: %s", commitFile);
}
log.info("Loading sinks from[%s]: %s", baseDir, committed.getHydrants().keySet());
for (File sinkDir : files) {
final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
if (!identifierFile.isFile()) {
// No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
continue;
}
try {
final SegmentIdentifier identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdentifier.class);
final int committedHydrants = committed.getCommittedHydrants(identifier.getIdentifierAsString());
if (committedHydrants <= 0) {
log.info("Removing uncommitted sink at [%s]", sinkDir);
FileUtils.deleteDirectory(sinkDir);
continue;
}
// To avoid reading and listing of "merged" dir and other special files
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
}
});
List<FireHydrant> hydrants = Lists.newArrayList();
for (File hydrantDir : sinkFiles) {
final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
if (hydrantNumber >= committedHydrants) {
log.info("Removing uncommitted segment at [%s]", hydrantDir);
FileUtils.deleteDirectory(hydrantDir);
} else {
log.info("Loading previously persisted segment at [%s]", hydrantDir);
if (hydrantNumber != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(identifier.getIdentifierAsString(), indexIO.loadIndex(hydrantDir)), hydrantNumber));
}
}
// Make sure we loaded enough hydrants.
if (committedHydrants != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getMaxRowsInMemory(), tuningConfig.isReportParseExceptions(), hydrants);
sinks.put(identifier, currSink);
sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
segmentAnnouncer.announceSegment(currSink.getSegment());
} catch (IOException e) {
log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
}
}
// Make sure we loaded all committed sinks.
final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), new Function<SegmentIdentifier, String>() {
@Override
public String apply(SegmentIdentifier input) {
return input.getIdentifierAsString();
}
}));
final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
if (!missingSinks.isEmpty()) {
throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
}
return committed.getMetadata();
}
use of com.google.common.base.Function in project druid by druid-io.
the class AppenderatorImpl method persistAll.
@Override
public ListenableFuture<Object> persistAll(final Committer committer) {
// Submit persistAll task to the persistExecutor
final Map<SegmentIdentifier, Integer> commitHydrants = Maps.newHashMap();
final List<Pair<FireHydrant, SegmentIdentifier>> indexesToPersist = Lists.newArrayList();
final Set<SegmentIdentifier> identifiers = sinks.keySet();
for (SegmentIdentifier identifier : identifiers) {
final Sink sink = sinks.get(identifier);
final List<FireHydrant> hydrants = Lists.newArrayList(sink);
commitHydrants.put(identifier, hydrants.size());
final int limit = sink.isWritable() ? hydrants.size() - 1 : hydrants.size();
for (FireHydrant hydrant : hydrants.subList(0, limit)) {
if (!hydrant.hasSwapped()) {
log.info("Hydrant[%s] hasn't persisted yet, persisting. Segment[%s]", hydrant, identifier);
indexesToPersist.add(Pair.of(hydrant, identifier));
}
}
if (sink.swappable()) {
indexesToPersist.add(Pair.of(sink.swap(), identifier));
}
}
log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource());
final String threadName = String.format("%s-incremental-persist", schema.getDataSource());
final Object commitMetadata = committer.getMetadata();
final Stopwatch runExecStopwatch = Stopwatch.createStarted();
final Stopwatch persistStopwatch = Stopwatch.createStarted();
final ListenableFuture<Object> future = persistExecutor.submit(new ThreadRenamingCallable<Object>(threadName) {
@Override
public Object doCall() {
try {
for (Pair<FireHydrant, SegmentIdentifier> pair : indexesToPersist) {
metrics.incrementRowOutputCount(persistHydrant(pair.lhs, pair.rhs));
}
log.info("Committing metadata[%s] for sinks[%s].", commitMetadata, Joiner.on(", ").join(Iterables.transform(commitHydrants.entrySet(), new Function<Map.Entry<SegmentIdentifier, Integer>, String>() {
@Override
public String apply(Map.Entry<SegmentIdentifier, Integer> entry) {
return String.format("%s:%d", entry.getKey().getIdentifierAsString(), entry.getValue());
}
})));
committer.run();
objectMapper.writeValue(computeCommitFile(), Committed.create(commitHydrants, commitMetadata));
return commitMetadata;
} catch (Exception e) {
metrics.incrementFailedPersists();
throw Throwables.propagate(e);
} finally {
metrics.incrementNumPersists();
metrics.incrementPersistTimeMillis(persistStopwatch.elapsed(TimeUnit.MILLISECONDS));
persistStopwatch.stop();
}
}
});
final long startDelay = runExecStopwatch.elapsed(TimeUnit.MILLISECONDS);
metrics.incrementPersistBackPressureMillis(startDelay);
if (startDelay > WARN_DELAY) {
log.warn("Ingestion was throttled for [%,d] millis because persists were pending.", startDelay);
}
runExecStopwatch.stop();
resetNextFlush();
// NB: The rows are still in memory until they're done persisting, but we only count rows in active indexes.
rowsCurrentlyInMemory.set(0);
return future;
}
use of com.google.common.base.Function in project druid by druid-io.
the class DatasourcesResource method getSegmentDataSourceSpecificInterval.
/**
* Provides serverView for a datasource and Interval which gives details about servers hosting segments for an interval
* Used by the realtime tasks to fetch a view of the interval they are interested in.
*/
@GET
@Path("/{dataSourceName}/intervals/{interval}/serverview")
@Produces(MediaType.APPLICATION_JSON)
@ResourceFilters(DatasourceResourceFilter.class)
public Response getSegmentDataSourceSpecificInterval(@PathParam("dataSourceName") String dataSourceName, @PathParam("interval") String interval, @QueryParam("partial") final boolean partial) {
TimelineLookup<String, SegmentLoadInfo> timeline = serverInventoryView.getTimeline(new TableDataSource(dataSourceName));
final Interval theInterval = new Interval(interval.replace("_", "/"));
if (timeline == null) {
log.debug("No timeline found for datasource[%s]", dataSourceName);
return Response.ok(Lists.<ImmutableSegmentLoadInfo>newArrayList()).build();
}
Iterable<TimelineObjectHolder<String, SegmentLoadInfo>> lookup = timeline.lookupWithIncompletePartitions(theInterval);
FunctionalIterable<ImmutableSegmentLoadInfo> retval = FunctionalIterable.create(lookup).transformCat(new Function<TimelineObjectHolder<String, SegmentLoadInfo>, Iterable<ImmutableSegmentLoadInfo>>() {
@Override
public Iterable<ImmutableSegmentLoadInfo> apply(TimelineObjectHolder<String, SegmentLoadInfo> input) {
return Iterables.transform(input.getObject(), new Function<PartitionChunk<SegmentLoadInfo>, ImmutableSegmentLoadInfo>() {
@Override
public ImmutableSegmentLoadInfo apply(PartitionChunk<SegmentLoadInfo> chunk) {
return chunk.getObject().toImmutableSegmentLoadInfo();
}
});
}
});
return Response.ok(retval).build();
}
Aggregations