use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class RealtimePlumber method persistAndMerge.
// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime));
mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {
final Interval interval = sink.getInterval();
Stopwatch mergeStopwatch = null;
@Override
public void doRun() {
try {
// Bail out if this sink has been abandoned by a previously-executed task.
if (sinks.get(truncatedTime) != sink) {
log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
return;
}
// Use a file to indicate that pushing has completed.
final File persistDir = computePersistDir(schema, interval);
final File mergedTarget = new File(persistDir, "merged");
final File isPushedMarker = new File(persistDir, "isPushedMarker");
if (!isPushedMarker.exists()) {
removeSegment(sink, mergedTarget);
if (mergedTarget.exists()) {
log.wtf("Merged target[%s] exists?!", mergedTarget);
return;
}
} else {
log.info("Already pushed sink[%s]", sink);
return;
}
/*
Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
changes to plumber.
*/
for (FireHydrant hydrant : sink) {
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
final int rowCount = persistHydrant(hydrant, schema, interval, null);
metrics.incrementRowOutputCount(rowCount);
}
}
}
final long mergeThreadCpuTime = VMUtils.safeGetThreadCpuTime();
mergeStopwatch = Stopwatch.createStarted();
List<QueryableIndex> indexes = Lists.newArrayList();
for (FireHydrant fireHydrant : sink) {
Segment segment = fireHydrant.getSegment();
final QueryableIndex queryableIndex = segment.asQueryableIndex();
log.info("Adding hydrant[%s]", fireHydrant);
indexes.add(queryableIndex);
}
final File mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, config.getIndexSpec());
// emit merge metrics before publishing segment
metrics.incrementMergeCpuTime(VMUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
QueryableIndex index = indexIO.loadIndex(mergedFile);
log.info("Pushing [%s] to deep storage", sink.getSegment().getIdentifier());
DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
log.info("Inserting [%s] to the metadata store", sink.getSegment().getIdentifier());
segmentPublisher.publishSegment(segment);
if (!isPushedMarker.createNewFile()) {
log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
}
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
if (shuttingDown) {
// We're trying to shut down, and this segment failed to push. Let's just get rid of it.
// This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
cleanShutdown = false;
abandonSegment(truncatedTime, sink);
}
} finally {
if (mergeStopwatch != null) {
mergeStopwatch.stop();
}
}
}
});
handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {
@Override
public void run() {
abandonSegment(sink.getInterval().getStartMillis(), sink);
metrics.incrementHandOffCount();
}
});
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class RealtimePlumber method bootstrapSinksFromDisk.
protected Object bootstrapSinksFromDisk() {
final VersioningPolicy versioningPolicy = config.getVersioningPolicy();
File baseDir = computeBaseDir(schema);
if (baseDir == null || !baseDir.exists()) {
return null;
}
File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
Object metadata = null;
long latestCommitTime = 0;
for (File sinkDir : files) {
final Interval sinkInterval = new Interval(sinkDir.getName().replace("_", "/"));
//final File[] sinkFiles = sinkDir.listFiles();
// To avoid reading and listing of "merged" dir
final File[] sinkFiles = sinkDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String fileName) {
return !(Ints.tryParse(fileName) == null);
}
});
Arrays.sort(sinkFiles, new Comparator<File>() {
@Override
public int compare(File o1, File o2) {
try {
return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName()));
} catch (NumberFormatException e) {
log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2);
return o1.compareTo(o2);
}
}
});
boolean isCorrupted = false;
List<FireHydrant> hydrants = Lists.newArrayList();
for (File segmentDir : sinkFiles) {
log.info("Loading previously persisted segment at [%s]", segmentDir);
// If 100% sure that this is not needed, this check can be removed.
if (Ints.tryParse(segmentDir.getName()) == null) {
continue;
}
QueryableIndex queryableIndex = null;
try {
queryableIndex = indexIO.loadIndex(segmentDir);
} catch (IOException e) {
log.error(e, "Problem loading segmentDir from disk.");
isCorrupted = true;
}
if (isCorrupted) {
try {
File corruptSegmentDir = computeCorruptedFileDumpDir(segmentDir, schema);
log.info("Renaming %s to %s", segmentDir.getAbsolutePath(), corruptSegmentDir.getAbsolutePath());
FileUtils.copyDirectory(segmentDir, corruptSegmentDir);
FileUtils.deleteDirectory(segmentDir);
} catch (Exception e1) {
log.error(e1, "Failed to rename %s", segmentDir.getAbsolutePath());
}
//at some point.
continue;
}
Metadata segmentMetadata = queryableIndex.getMetadata();
if (segmentMetadata != null) {
Object timestampObj = segmentMetadata.get(COMMIT_METADATA_TIMESTAMP_KEY);
if (timestampObj != null) {
long timestamp = ((Long) timestampObj).longValue();
if (timestamp > latestCommitTime) {
log.info("Found metaData [%s] with latestCommitTime [%s] greater than previous recorded [%s]", queryableIndex.getMetadata(), timestamp, latestCommitTime);
latestCommitTime = timestamp;
metadata = queryableIndex.getMetadata().get(COMMIT_METADATA_KEY);
}
}
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(DataSegment.makeDataSegmentIdentifier(schema.getDataSource(), sinkInterval.getStart(), sinkInterval.getEnd(), versioningPolicy.getVersion(sinkInterval), config.getShardSpec()), queryableIndex), Integer.parseInt(segmentDir.getName())));
}
if (hydrants.isEmpty()) {
// Probably encountered a corrupt sink directory
log.warn("Found persisted segment directory with no intermediate segments present at %s, skipping sink creation.", sinkDir.getAbsolutePath());
continue;
}
final Sink currSink = new Sink(sinkInterval, schema, config.getShardSpec(), versioningPolicy.getVersion(sinkInterval), config.getMaxRowsInMemory(), config.isReportParseExceptions(), hydrants);
addSink(currSink);
}
return metadata;
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class SpatialFilterBonusTest method constructorFeeder.
@Parameterized.Parameters
public static Collection<?> constructorFeeder() throws IOException {
final IndexSpec indexSpec = new IndexSpec();
final IncrementalIndex rtIndex = makeIncrementalIndex();
final QueryableIndex mMappedTestIndex = makeQueryableIndex(indexSpec);
final QueryableIndex mergedRealtimeIndex = makeMergedQueryableIndex(indexSpec);
return Arrays.asList(new Object[][] { { new IncrementalIndexSegment(rtIndex, null) }, { new QueryableIndexSegment(null, mMappedTestIndex) }, { new QueryableIndexSegment(null, mergedRealtimeIndex) } });
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class SpatialFilterTest method makeMergedQueryableIndex.
private static QueryableIndex makeMergedQueryableIndex(IndexSpec indexSpec) {
try {
IncrementalIndex first = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
IncrementalIndex second = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, 1000);
IncrementalIndex third = new OnheapIncrementalIndex(new IncrementalIndexSchema.Builder().withMinTimestamp(DATA_INTERVAL.getStartMillis()).withQueryGranularity(Granularities.DAY).withMetrics(METRIC_AGGS).withDimensionsSpec(new DimensionsSpec(null, null, Arrays.asList(new SpatialDimensionSchema("dim.geo", Arrays.asList("lat", "long")), new SpatialDimensionSchema("spatialIsRad", Arrays.asList("lat2", "long2"))))).build(), false, NUM_POINTS);
first.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "foo", "lat", 0.0f, "long", 0.0f, "val", 17L)));
first.add(new MapBasedInputRow(new DateTime("2013-01-02").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-02").toString(), "dim", "foo", "lat", 1.0f, "long", 3.0f, "val", 29L)));
first.add(new MapBasedInputRow(new DateTime("2013-01-03").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-03").toString(), "dim", "foo", "lat", 4.0f, "long", 2.0f, "val", 13L)));
first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", "_mmx.unknown", "long", "_mmx.unknown", "val", 101L)));
first.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "dim.geo", "_mmx.unknown", "val", 501L)));
second.add(new MapBasedInputRow(new DateTime("2013-01-04").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-04").toString(), "dim", "foo", "lat", 7.0f, "long", 3.0f, "val", 91L)));
second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "dim", "foo", "lat", 8.0f, "long", 6.0f, "val", 47L)));
second.add(new MapBasedInputRow(new DateTime("2013-01-05").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-05").toString(), "lat2", 0.0f, "long2", 0.0f, "val", 13L)));
// Add a bunch of random points
Random rand = new Random();
for (int i = 8; i < NUM_POINTS; i++) {
third.add(new MapBasedInputRow(new DateTime("2013-01-01").getMillis(), DIMS, ImmutableMap.<String, Object>of("timestamp", new DateTime("2013-01-01").toString(), "dim", "boo", "lat", (float) (rand.nextFloat() * 10 + 10.0), "long", (float) (rand.nextFloat() * 10 + 10.0), "val", i)));
}
File tmpFile = File.createTempFile("yay", "who");
tmpFile.delete();
File firstFile = new File(tmpFile, "first");
File secondFile = new File(tmpFile, "second");
File thirdFile = new File(tmpFile, "third");
File mergedFile = new File(tmpFile, "merged");
firstFile.mkdirs();
firstFile.deleteOnExit();
secondFile.mkdirs();
secondFile.deleteOnExit();
thirdFile.mkdirs();
thirdFile.deleteOnExit();
mergedFile.mkdirs();
mergedFile.deleteOnExit();
INDEX_MERGER.persist(first, DATA_INTERVAL, firstFile, indexSpec);
INDEX_MERGER.persist(second, DATA_INTERVAL, secondFile, indexSpec);
INDEX_MERGER.persist(third, DATA_INTERVAL, thirdFile, indexSpec);
QueryableIndex mergedRealtime = INDEX_IO.loadIndex(INDEX_MERGER.mergeQueryableIndex(Arrays.asList(INDEX_IO.loadIndex(firstFile), INDEX_IO.loadIndex(secondFile), INDEX_IO.loadIndex(thirdFile)), true, METRIC_AGGS, mergedFile, indexSpec));
return mergedRealtime;
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
use of io.druid.segment.QueryableIndex in project druid by druid-io.
the class SpatialFilterTest method constructorFeeder.
@Parameterized.Parameters
public static Collection<?> constructorFeeder() throws IOException {
final IndexSpec indexSpec = new IndexSpec();
final IncrementalIndex rtIndex = makeIncrementalIndex();
final QueryableIndex mMappedTestIndex = makeQueryableIndex(indexSpec);
final QueryableIndex mergedRealtimeIndex = makeMergedQueryableIndex(indexSpec);
return Arrays.asList(new Object[][] { { new IncrementalIndexSegment(rtIndex, null) }, { new QueryableIndexSegment(null, mMappedTestIndex) }, { new QueryableIndexSegment(null, mergedRealtimeIndex) } });
}
Aggregations