use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class BatchAppenderator method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted.
*
* @param identifier sink identifier
* @param sink sink to push
* @return segment descriptor, or null if the sink is no longer valid
*/
@Nullable
private DataSegment mergeAndPush(final SegmentIdWithShardSpec identifier, final Sink sink) {
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
if (sink.isWritable()) {
throw new ISE("Expected sink to be no longer writable before mergeAndPush for segment[%s].", identifier);
}
int numHydrants = 0;
for (FireHydrant hydrant : sink) {
if (!hydrant.hasSwapped()) {
throw new ISE("Expected sink to be fully persisted before mergeAndPush for segment[%s].", identifier);
}
numHydrants++;
}
SinkMetadata sm = sinksMetadata.get(identifier);
if (sm == null) {
log.warn("Sink metadata not found just before merge for identifier [%s]", identifier);
} else if (numHydrants != sm.getNumHydrants()) {
throw new ISE("Number of restored hydrants[%d] for identifier[%s] does not match expected value[%d]", numHydrants, identifier, sm.getNumHydrants());
}
try {
if (descriptorFile.exists()) {
// Already pushed.
log.info("Segment[%s] already pushed, skipping.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
final File mergedFile;
final long mergeFinishTime;
final long startTime = System.nanoTime();
List<QueryableIndex> indexes = new ArrayList<>();
Closer closer = Closer.create();
try {
for (FireHydrant fireHydrant : sink) {
Pair<ReferenceCountingSegment, Closeable> segmentAndCloseable = fireHydrant.getAndIncrementSegment();
final QueryableIndex queryableIndex = segmentAndCloseable.lhs.asQueryableIndex();
log.debug("Segment[%s] adding hydrant[%s]", identifier, fireHydrant);
indexes.add(queryableIndex);
closer.register(segmentAndCloseable.rhs);
}
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), schema.getDimensionsSpec(), mergedTarget, tuningConfig.getIndexSpec(), tuningConfig.getIndexSpecForIntermediatePersists(), new BaseProgressIndicator(), tuningConfig.getSegmentWriteOutMediumFactory(), tuningConfig.getMaxColumnsToMerge());
mergeFinishTime = System.nanoTime();
log.debug("Segment[%s] built in %,dms.", identifier, (mergeFinishTime - startTime) / 1000000);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
// Retry pushing segments because uploading to deep storage might fail especially for cloud storage types
final DataSegment segment = RetryUtils.retry(// This appenderator is used only for the local indexing task so unique paths are not required
() -> dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(IndexMerger.getMergedDimensionsFromQueryableIndexes(indexes, schema.getDimensionsSpec())), false), exception -> exception instanceof Exception, 5);
// can generate OOMs during merge if enough of them are held back...
for (FireHydrant fireHydrant : sink) {
fireHydrant.swapSegment(null);
}
// cleanup, sink no longer needed
removeDirectory(computePersistDir(identifier));
final long pushFinishTime = System.nanoTime();
log.info("Segment[%s] of %,d bytes " + "built from %d incremental persist(s) in %,dms; " + "pushed to deep storage in %,dms. " + "Load spec is: %s", identifier, segment.getSize(), indexes.size(), (mergeFinishTime - startTime) / 1000000, (pushFinishTime - mergeFinishTime) / 1000000, objectMapper.writeValueAsString(segment.getLoadSpec()));
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw new RuntimeException(e);
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class InputSourceSampler method sample.
public SamplerResponse sample(final InputSource inputSource, // inputFormat can be null only if inputSource.needsFormat() = false or parser is specified.
@Nullable final InputFormat inputFormat, @Nullable final DataSchema dataSchema, @Nullable final SamplerConfig samplerConfig) {
Preconditions.checkNotNull(inputSource, "inputSource required");
if (inputSource.needsFormat()) {
Preconditions.checkNotNull(inputFormat, "inputFormat required");
}
final DataSchema nonNullDataSchema = dataSchema == null ? DEFAULT_DATA_SCHEMA : dataSchema;
final SamplerConfig nonNullSamplerConfig = samplerConfig == null ? SamplerConfig.empty() : samplerConfig;
final Closer closer = Closer.create();
final File tempDir = FileUtils.createTempDir();
closer.register(() -> FileUtils.deleteDirectory(tempDir));
try {
final InputSourceReader reader = buildReader(nonNullSamplerConfig, nonNullDataSchema, inputSource, inputFormat, tempDir);
try (final CloseableIterator<InputRowListPlusRawValues> iterator = reader.sample();
final IncrementalIndex index = buildIncrementalIndex(nonNullSamplerConfig, nonNullDataSchema);
final Closer closer1 = closer) {
List<SamplerResponseRow> responseRows = new ArrayList<>(nonNullSamplerConfig.getNumRows());
int numRowsIndexed = 0;
while (responseRows.size() < nonNullSamplerConfig.getNumRows() && iterator.hasNext()) {
final InputRowListPlusRawValues inputRowListPlusRawValues = iterator.next();
final List<Map<String, Object>> rawColumnsList = inputRowListPlusRawValues.getRawValuesList();
final ParseException parseException = inputRowListPlusRawValues.getParseException();
if (parseException != null) {
if (rawColumnsList != null) {
// add all rows to response
responseRows.addAll(rawColumnsList.stream().map(rawColumns -> new SamplerResponseRow(rawColumns, null, true, parseException.getMessage())).collect(Collectors.toList()));
} else {
// no data parsed, add one response row
responseRows.add(new SamplerResponseRow(null, null, true, parseException.getMessage()));
}
continue;
}
List<InputRow> inputRows = inputRowListPlusRawValues.getInputRows();
if (inputRows == null) {
continue;
}
for (int i = 0; i < inputRows.size(); i++) {
// InputRowListPlusRawValues guarantees the size of rawColumnsList and inputRows are the same
Map<String, Object> rawColumns = rawColumnsList == null ? null : rawColumnsList.get(i);
InputRow row = inputRows.get(i);
// keep the index of the row to be added to responseRows for further use
final int rowIndex = responseRows.size();
IncrementalIndexAddResult addResult = index.add(new SamplerInputRow(row, rowIndex), true);
if (addResult.hasParseException()) {
responseRows.add(new SamplerResponseRow(rawColumns, null, true, addResult.getParseException().getMessage()));
} else {
// store the raw value; will be merged with the data from the IncrementalIndex later
responseRows.add(new SamplerResponseRow(rawColumns, null, null, null));
numRowsIndexed++;
}
}
}
final List<String> columnNames = index.getColumnNames();
columnNames.remove(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
for (Row row : index) {
Map<String, Object> parsed = new LinkedHashMap<>();
parsed.put(ColumnHolder.TIME_COLUMN_NAME, row.getTimestampFromEpoch());
columnNames.forEach(k -> parsed.put(k, row.getRaw(k)));
Number sortKey = row.getMetric(SamplerInputRow.SAMPLER_ORDERING_COLUMN);
if (sortKey != null) {
responseRows.set(sortKey.intValue(), responseRows.get(sortKey.intValue()).withParsed(parsed));
}
}
// make sure size of responseRows meets the input
if (responseRows.size() > nonNullSamplerConfig.getNumRows()) {
responseRows = responseRows.subList(0, nonNullSamplerConfig.getNumRows());
}
int numRowsRead = responseRows.size();
return new SamplerResponse(numRowsRead, numRowsIndexed, responseRows.stream().filter(Objects::nonNull).filter(x -> x.getParsed() != null || x.isUnparseable() != null).collect(Collectors.toList()));
}
} catch (Exception e) {
throw new SamplerException(e, "Failed to sample data: %s", e.getMessage());
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class QueryableIndexCursorSequenceBuilder method build.
public Sequence<Cursor> build(final Granularity gran) {
final Offset baseOffset;
if (filterBitmap == null) {
baseOffset = descending ? new SimpleDescendingOffset(index.getNumRows()) : new SimpleAscendingOffset(index.getNumRows());
} else {
baseOffset = BitmapOffset.of(filterBitmap, descending, index.getNumRows());
}
// Column caches shared amongst all cursors in this sequence.
final Map<String, BaseColumn> columnCache = new HashMap<>();
final NumericColumn timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn();
final Closer closer = Closer.create();
closer.register(timestamps);
Iterable<Interval> iterable = gran.getIterable(interval);
if (descending) {
iterable = Lists.reverse(ImmutableList.copyOf(iterable));
}
return Sequences.withBaggage(Sequences.map(Sequences.simple(iterable), new Function<Interval, Cursor>() {
@Override
public Cursor apply(final Interval inputInterval) {
final long timeStart = Math.max(interval.getStartMillis(), inputInterval.getStartMillis());
final long timeEnd = Math.min(interval.getEndMillis(), gran.increment(inputInterval.getStartMillis()));
if (descending) {
for (; baseOffset.withinBounds(); baseOffset.increment()) {
if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) < timeEnd) {
break;
}
}
} else {
for (; baseOffset.withinBounds(); baseOffset.increment()) {
if (timestamps.getLongSingleValueRow(baseOffset.getOffset()) >= timeStart) {
break;
}
}
}
final Offset offset = descending ? new DescendingTimestampCheckingOffset(baseOffset, timestamps, timeStart, minDataTimestamp >= timeStart) : new AscendingTimestampCheckingOffset(baseOffset, timestamps, timeEnd, maxDataTimestamp < timeEnd);
final Offset baseCursorOffset = offset.clone();
final ColumnSelectorFactory columnSelectorFactory = new QueryableIndexColumnSelectorFactory(index, virtualColumns, descending, closer, baseCursorOffset.getBaseReadableOffset(), columnCache);
final DateTime myBucket = gran.toDateTime(inputInterval.getStartMillis());
if (postFilter == null) {
return new QueryableIndexCursor(baseCursorOffset, columnSelectorFactory, myBucket);
} else {
FilteredOffset filteredOffset = new FilteredOffset(baseCursorOffset, columnSelectorFactory, descending, postFilter, bitmapIndexSelector);
return new QueryableIndexCursor(filteredOffset, columnSelectorFactory, myBucket);
}
}
}), closer);
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class ReferenceCountingCloseableObject method incrementReferenceAndDecrementOnceCloseable.
/**
* Returns an {@link Optional} of a {@link Closeable} from {@link #decrementOnceCloseable}, if it is able to
* successfully {@link #increment}, else nothing indicating that the reference could not be acquired.
*/
public Optional<Closeable> incrementReferenceAndDecrementOnceCloseable() {
final Closer closer;
if (increment()) {
closer = Closer.create();
closer.register(decrementOnceCloseable());
} else {
closer = null;
}
return Optional.ofNullable(closer);
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class AggregationTestHelper method createTopNQueryAggregationTestHelper.
public static AggregationTestHelper createTopNQueryAggregationTestHelper(List<? extends Module> jsonModulesToRegister, TemporaryFolder tempFolder) {
ObjectMapper mapper = TestHelper.makeJsonMapper();
TopNQueryQueryToolChest toolchest = new TopNQueryQueryToolChest(new TopNQueryConfig());
final CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool", new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
return ByteBuffer.allocate(10 * 1024 * 1024);
}
});
final Closer resourceCloser = Closer.create();
TopNQueryRunnerFactory factory = new TopNQueryRunnerFactory(pool, toolchest, QueryRunnerTestHelper.NOOP_QUERYWATCHER);
IndexIO indexIO = new IndexIO(mapper, new ColumnConfig() {
@Override
public int columnCacheSizeBytes() {
return 0;
}
});
return new AggregationTestHelper(mapper, new IndexMergerV9(mapper, indexIO, OffHeapMemorySegmentWriteOutMediumFactory.instance()), indexIO, toolchest, factory, tempFolder, jsonModulesToRegister, resourceCloser, Collections.emptyMap());
}
Aggregations