use of org.apache.druid.segment.QueryableIndexSegment in project druid by druid-io.
the class AppenderatorImpl method persistHydrant.
/**
* Persists the given hydrant and returns the number of rows persisted. Must only be called in the single-threaded
* persistExecutor.
*
* @param indexToPersist hydrant to persist
* @param identifier the segment this hydrant is going to be part of
*
* @return the number of rows persisted
*/
private int persistHydrant(FireHydrant indexToPersist, SegmentIdWithShardSpec identifier) {
synchronized (indexToPersist) {
if (indexToPersist.hasSwapped()) {
log.info("Segment[%s] hydrant[%s] already swapped. Ignoring request to persist.", identifier, indexToPersist);
return 0;
}
log.debug("Segment[%s], persisting Hydrant[%s]", identifier, indexToPersist);
try {
final long startTime = System.nanoTime();
int numRows = indexToPersist.getIndex().size();
final File persistedFile;
final File persistDir = createPersistDirIfNeeded(identifier);
persistedFile = indexMerger.persist(indexToPersist.getIndex(), identifier.getInterval(), new File(persistDir, String.valueOf(indexToPersist.getCount())), tuningConfig.getIndexSpecForIntermediatePersists(), tuningConfig.getSegmentWriteOutMediumFactory());
log.info("Flushed in-memory data for segment[%s] spill[%s] to disk in [%,d] ms (%,d rows).", indexToPersist.getSegmentId(), indexToPersist.getCount(), (System.nanoTime() - startTime) / 1000000, numRows);
// Map only when this appenderator is being driven by a real time task:
Segment segmentToSwap = null;
if (isOpenSegments()) {
segmentToSwap = new QueryableIndexSegment(indexIO.loadIndex(persistedFile), indexToPersist.getSegmentId());
} else {
// remember file path & segment id to rebuild the queryable index for merge:
persistedHydrantMetadata.put(indexToPersist, new Pair<>(persistedFile, indexToPersist.getSegmentId()));
}
indexToPersist.swapSegment(segmentToSwap);
return numRows;
} catch (IOException e) {
log.makeAlert("Incremental persist failed").addData("segment", identifier.toString()).addData("dataSource", schema.getDataSource()).addData("count", indexToPersist.getCount()).emit();
throw new RuntimeException(e);
}
}
}
use of org.apache.druid.segment.QueryableIndexSegment in project druid by druid-io.
the class AppenderatorImpl method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
* be run in the single-threaded pushExecutor.
*
* @param identifier sink identifier
* @param sink sink to push
* @param useUniquePath true if the segment should be written to a path with a unique identifier
*
* @return segment descriptor, or null if the sink is no longer valid
*/
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink, final boolean useUniquePath) {
// noinspection ObjectEquality
if (sinks.get(identifier) != sink) {
log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
return null;
}
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
for (FireHydrant hydrant : sink) {
if (sink.isWritable()) {
throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
}
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
}
}
}
try {
if (descriptorFile.exists()) {
if (useUniquePath) {
// Don't reuse the descriptor, because the caller asked for a unique path. Leave the old one as-is, since
// it might serve some unknown purpose.
log.debug("Segment[%s] already pushed, but we want a unique path, so will push again with a new path.", identifier);
} else {
log.info("Segment[%s] already pushed, skipping.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
}
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
final File mergedFile;
final long mergeFinishTime;
final long startTime = System.nanoTime();
List<QueryableIndex> indexes = new ArrayList<>();
Closer closer = Closer.create();
try {
for (FireHydrant fireHydrant : sink) {
// if batch, swap/persist did not memory map the incremental index, we need it mapped now:
if (!isOpenSegments()) {
// sanity
Pair<File, SegmentId> persistedMetadata = persistedHydrantMetadata.get(fireHydrant);
if (persistedMetadata == null) {
throw new ISE("Persisted metadata for batch hydrant [%s] is null!", fireHydrant);
}
File persistedFile = persistedMetadata.lhs;
SegmentId persistedSegmentId = persistedMetadata.rhs;
// sanity:
if (persistedFile == null) {
throw new ISE("Persisted file for batch hydrant [%s] is null!", fireHydrant);
} else if (persistedSegmentId == null) {
throw new ISE("Persisted segmentId for batch hydrant in file [%s] is null!", persistedFile.getPath());
}
fireHydrant.swapSegment(new QueryableIndexSegment(indexIO.loadIndex(persistedFile), persistedSegmentId));
}
Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
indexes.add(queryableIndex);
closer.register(segmentAndCloseable.rhs);
}
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
mergeFinishTime = System.nanoTime();
log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
final DataSegment segmentToPush = sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec()));
// Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
final DataSegment segment = RetryUtils.retry(// semantics.
() -> dataSegmentPusher.push(mergedFile, segmentToPush, useUniquePath), exception -> exception instanceof Exception, 5);
if (!isOpenSegments()) {
// can generate OOMs during merge if enough of them are held back...
for (FireHydrant fireHydrant : sink) {
fireHydrant.swapSegment(null);
}
}
final long pushFinishTime = System.nanoTime();
objectMapper.writeValue(descriptorFile, segment);
log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw new RuntimeException(e);
}
}
use of org.apache.druid.segment.QueryableIndexSegment in project druid by druid-io.
the class AppenderatorImpl method bootstrapSinksFromDisk.
/**
* Populate "sinks" and "sinkTimeline" with committed segments, and announce them with the segmentAnnouncer.
*
* @return persisted commit metadata
*/
private Object bootstrapSinksFromDisk() {
Preconditions.checkState(sinks.isEmpty(), "Already bootstrapped?!");
final File baseDir = tuningConfig.getBasePersistDirectory();
if (!baseDir.exists()) {
return null;
}
final File[] files = baseDir.listFiles();
if (files == null) {
return null;
}
final Committed committed;
File commitFile = null;
try {
commitLock.lock();
commitFile = computeCommitFile();
if (commitFile.exists()) {
committed = objectMapper.readValue(commitFile, Committed.class);
} else {
committed = Committed.nil();
}
} catch (Exception e) {
throw new ISE(e, "Failed to read commitFile: %s", commitFile);
} finally {
commitLock.unlock();
}
int rowsSoFar = 0;
if (committed.equals(Committed.nil())) {
log.debug("No previously committed metadata.");
} else {
log.info("Loading partially-persisted segments[%s] from[%s] with commit metadata: %s", String.join(", ", committed.getHydrants().keySet()), baseDir, committed.getMetadata());
}
for (File sinkDir : files) {
final File identifierFile = new File(sinkDir, IDENTIFIER_FILE_NAME);
if (!identifierFile.isFile()) {
// No identifier in this sinkDir; it must not actually be a sink directory. Skip it.
continue;
}
try {
final SegmentIdWithShardSpec identifier = objectMapper.readValue(new File(sinkDir, "identifier.json"), SegmentIdWithShardSpec.class);
final int committedHydrants = committed.getCommittedHydrants(identifier.toString());
if (committedHydrants <= 0) {
log.info("Removing uncommitted segment at [%s].", sinkDir);
FileUtils.deleteDirectory(sinkDir);
continue;
}
// To avoid reading and listing of "merged" dir and other special files
final File[] sinkFiles = sinkDir.listFiles((dir, fileName) -> !(Ints.tryParse(fileName) == null));
Arrays.sort(sinkFiles, (o1, o2) -> Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())));
List<FireHydrant> hydrants = new ArrayList<>();
for (File hydrantDir : sinkFiles) {
final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
if (hydrantNumber >= committedHydrants) {
log.info("Removing uncommitted partial segment at [%s]", hydrantDir);
FileUtils.deleteDirectory(hydrantDir);
} else {
log.debug("Loading previously persisted partial segment at [%s]", hydrantDir);
if (hydrantNumber != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber));
}
}
// Make sure we loaded enough hydrants.
if (committedHydrants != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in sinkDir [%s].", hydrants.size(), sinkDir);
}
Sink currSink = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, null, hydrants);
rowsSoFar += currSink.getNumRows();
sinks.put(identifier, currSink);
sinkTimeline.add(currSink.getInterval(), currSink.getVersion(), identifier.getShardSpec().createChunk(currSink));
segmentAnnouncer.announceSegment(currSink.getSegment());
} catch (IOException e) {
log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()).addData("sinkDir", sinkDir).emit();
}
}
// Make sure we loaded all committed sinks.
final Set<String> loadedSinks = Sets.newHashSet(Iterables.transform(sinks.keySet(), SegmentIdWithShardSpec::toString));
final Set<String> missingSinks = Sets.difference(committed.getHydrants().keySet(), loadedSinks);
if (!missingSinks.isEmpty()) {
throw new ISE("Missing committed sinks [%s]", Joiner.on(", ").join(missingSinks));
}
totalRows.set(rowsSoFar);
return committed.getMetadata();
}
use of org.apache.druid.segment.QueryableIndexSegment in project druid by druid-io.
the class BatchAppenderator method getSinkForIdentifierPath.
private Sink getSinkForIdentifierPath(SegmentIdWithShardSpec identifier, File identifierPath) throws IOException {
// To avoid reading and listing of "merged" dir and other special files
final File[] sinkFiles = identifierPath.listFiles((dir, fileName) -> !(Ints.tryParse(fileName) == null));
if (sinkFiles == null) {
throw new ISE("Problem reading persisted sinks in path[%s]", identifierPath);
}
Arrays.sort(sinkFiles, (o1, o2) -> Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())));
List<FireHydrant> hydrants = new ArrayList<>();
for (File hydrantDir : sinkFiles) {
final int hydrantNumber = Integer.parseInt(hydrantDir.getName());
log.debug("Loading previously persisted partial segment at [%s]", hydrantDir);
if (hydrantNumber != hydrants.size()) {
throw new ISE("Missing hydrant [%,d] in identifier [%s].", hydrants.size(), identifier);
}
hydrants.add(new FireHydrant(new QueryableIndexSegment(indexIO.loadIndex(hydrantDir), identifier.asSegmentId()), hydrantNumber));
}
Sink retVal = new Sink(identifier.getInterval(), schema, identifier.getShardSpec(), identifier.getVersion(), tuningConfig.getAppendableIndexSpec(), tuningConfig.getMaxRowsInMemory(), maxBytesTuningConfig, useMaxMemoryEstimates, null, hydrants);
// this sink is not writable
retVal.finishWriting();
return retVal;
}
use of org.apache.druid.segment.QueryableIndexSegment in project druid by druid-io.
the class SegmentAnalyzerTest method testMappedWorksHelper.
private void testMappedWorksHelper(EnumSet<SegmentMetadataQuery.AnalysisType> analyses) {
final List<SegmentAnalysis> results = getSegmentAnalysises(new QueryableIndexSegment(TestIndex.getMMappedTestIndex(), SegmentId.dummy("test_1")), analyses);
Assert.assertEquals(1, results.size());
final SegmentAnalysis analysis = results.get(0);
Assert.assertEquals(SegmentId.dummy("test_1").toString(), analysis.getId());
final Map<String, ColumnAnalysis> columns = analysis.getColumns();
Assert.assertEquals(TestIndex.COLUMNS.length + 3 - 1, columns.size());
for (DimensionSchema schema : TestIndex.DIMENSION_SCHEMAS) {
final String dimension = schema.getName();
final ColumnAnalysis columnAnalysis = columns.get(dimension);
if ("null_column".equals(dimension)) {
Assert.assertNull(columnAnalysis);
} else {
final boolean isString = schema.getColumnType().is(ValueType.STRING);
Assert.assertEquals(dimension, schema.getColumnType().toString(), columnAnalysis.getType());
Assert.assertEquals(dimension, 0, columnAnalysis.getSize());
if (isString) {
if (analyses == null) {
Assert.assertTrue(dimension, columnAnalysis.getCardinality() > 0);
} else {
Assert.assertEquals(dimension, 0, columnAnalysis.getCardinality().longValue());
}
} else {
Assert.assertNull(dimension, columnAnalysis.getCardinality());
}
}
}
for (String metric : TestIndex.DOUBLE_METRICS) {
final ColumnAnalysis columnAnalysis = columns.get(metric);
Assert.assertEquals(metric, ValueType.DOUBLE.name(), columnAnalysis.getType());
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
Assert.assertNull(metric, columnAnalysis.getCardinality());
}
for (String metric : TestIndex.FLOAT_METRICS) {
final ColumnAnalysis columnAnalysis = columns.get(metric);
Assert.assertEquals(metric, ValueType.FLOAT.name(), columnAnalysis.getType());
Assert.assertEquals(metric, 0, columnAnalysis.getSize());
Assert.assertNull(metric, columnAnalysis.getCardinality());
}
}
Aggregations