use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class ExpressionVectorSelectorsTest method sanityTestVectorizedExpressionSelectors.
public static void sanityTestVectorizedExpressionSelectors(String expression, @Nullable ExpressionType outputType, QueryableIndex index, Closer closer, int rowsPerSegment) {
final List<Object> results = new ArrayList<>(rowsPerSegment);
final VirtualColumns virtualColumns = VirtualColumns.create(ImmutableList.of(new ExpressionVirtualColumn("v", expression, ExpressionType.toColumnType(outputType), TestExprMacroTable.INSTANCE)));
final QueryableIndexStorageAdapter storageAdapter = new QueryableIndexStorageAdapter(index);
VectorCursor cursor = storageAdapter.makeVectorCursor(null, index.getDataInterval(), virtualColumns, false, 512, null);
ColumnCapabilities capabilities = virtualColumns.getColumnCapabilities(storageAdapter, "v");
int rowCount = 0;
if (capabilities.isDictionaryEncoded().isTrue()) {
SingleValueDimensionVectorSelector selector = cursor.getColumnSelectorFactory().makeSingleValueDimensionSelector(DefaultDimensionSpec.of("v"));
while (!cursor.isDone()) {
int[] row = selector.getRowVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(selector.lookupName(row[i]));
}
cursor.advance();
}
} else {
VectorValueSelector selector = null;
VectorObjectSelector objectSelector = null;
if (outputType != null && outputType.isNumeric()) {
selector = cursor.getColumnSelectorFactory().makeValueSelector("v");
} else {
objectSelector = cursor.getColumnSelectorFactory().makeObjectSelector("v");
}
while (!cursor.isDone()) {
boolean[] nulls;
switch(outputType.getType()) {
case LONG:
nulls = selector.getNullVector();
long[] longs = selector.getLongVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : longs[i]);
}
break;
case DOUBLE:
// special case to test floats just to get coverage on getFloatVector
if ("float2".equals(expression)) {
nulls = selector.getNullVector();
float[] floats = selector.getFloatVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : (double) floats[i]);
}
} else {
nulls = selector.getNullVector();
double[] doubles = selector.getDoubleVector();
for (int i = 0; i < selector.getCurrentVectorSize(); i++, rowCount++) {
results.add(nulls != null && nulls[i] ? null : doubles[i]);
}
}
break;
case STRING:
Object[] objects = objectSelector.getObjectVector();
for (int i = 0; i < objectSelector.getCurrentVectorSize(); i++, rowCount++) {
results.add(objects[i]);
}
break;
}
cursor.advance();
}
}
closer.register(cursor);
Sequence<Cursor> cursors = new QueryableIndexStorageAdapter(index).makeCursors(null, index.getDataInterval(), virtualColumns, Granularities.ALL, false, null);
int rowCountCursor = cursors.map(nonVectorized -> {
final ColumnValueSelector nonSelector = nonVectorized.getColumnSelectorFactory().makeColumnValueSelector("v");
int rows = 0;
while (!nonVectorized.isDone()) {
Assert.assertEquals(StringUtils.format("Failed at row %s", rows), nonSelector.getObject(), results.get(rows));
rows++;
nonVectorized.advance();
}
return rows;
}).accumulate(0, (acc, in) -> acc + in);
Assert.assertTrue(rowCountCursor > 0);
Assert.assertEquals(rowCountCursor, rowCount);
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class QueryStackTests method createQueryRunnerFactoryConglomerate.
public static QueryRunnerFactoryConglomerate createQueryRunnerFactoryConglomerate(final Closer closer, final DruidProcessingConfig processingConfig, final Supplier<Integer> minTopNThresholdSupplier) {
final CloseableStupidPool<ByteBuffer> stupidPool = new CloseableStupidPool<>("TopNQueryRunnerFactory-bufferPool", () -> ByteBuffer.allocate(COMPUTE_BUFFER_SIZE));
closer.register(stupidPool);
final Pair<GroupByQueryRunnerFactory, Closer> factoryCloserPair = GroupByQueryRunnerTest.makeQueryRunnerFactory(GroupByQueryRunnerTest.DEFAULT_MAPPER, new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
}, processingConfig);
final GroupByQueryRunnerFactory groupByQueryRunnerFactory = factoryCloserPair.lhs;
closer.register(factoryCloserPair.rhs);
final QueryRunnerFactoryConglomerate conglomerate = new DefaultQueryRunnerFactoryConglomerate(ImmutableMap.<Class<? extends Query>, QueryRunnerFactory>builder().put(SegmentMetadataQuery.class, new SegmentMetadataQueryRunnerFactory(new SegmentMetadataQueryQueryToolChest(new SegmentMetadataQueryConfig("P1W")), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(ScanQuery.class, new ScanQueryRunnerFactory(new ScanQueryQueryToolChest(new ScanQueryConfig(), new DefaultGenericQueryMetricsFactory()), new ScanQueryEngine(), new ScanQueryConfig())).put(TimeseriesQuery.class, new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(TopNQuery.class, new TopNQueryRunnerFactory(stupidPool, new TopNQueryQueryToolChest(new TopNQueryConfig() {
@Override
public int getMinTopNThreshold() {
return minTopNThresholdSupplier.get();
}
}), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(GroupByQuery.class, groupByQueryRunnerFactory).build());
return conglomerate;
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class TimeseriesQueryEngine method processVectorized.
private Sequence<Result<TimeseriesResultValue>> processVectorized(final TimeseriesQuery query, final StorageAdapter adapter, @Nullable final Filter filter, final Interval queryInterval, final Granularity gran, final boolean descending) {
final boolean skipEmptyBuckets = query.isSkipEmptyBuckets();
final List<AggregatorFactory> aggregatorSpecs = query.getAggregatorSpecs();
final VectorCursor cursor = adapter.makeVectorCursor(filter, queryInterval, query.getVirtualColumns(), descending, QueryContexts.getVectorSize(query), null);
if (cursor == null) {
return Sequences.empty();
}
final Closer closer = Closer.create();
closer.register(cursor);
try {
final VectorCursorGranularizer granularizer = VectorCursorGranularizer.create(adapter, cursor, gran, queryInterval);
if (granularizer == null) {
return Sequences.empty();
}
final VectorColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory();
final AggregatorAdapters aggregators = closer.register(AggregatorAdapters.factorizeVector(columnSelectorFactory, query.getAggregatorSpecs()));
final ResourceHolder<ByteBuffer> bufferHolder = closer.register(bufferPool.take());
final ByteBuffer buffer = bufferHolder.get();
if (aggregators.spaceNeeded() > buffer.remaining()) {
throw new ISE("Not enough space for aggregators, needed [%,d] bytes but have only [%,d].", aggregators.spaceNeeded(), buffer.remaining());
}
return Sequences.withBaggage(Sequences.simple(granularizer.getBucketIterable()).map(bucketInterval -> {
// Whether or not the current bucket is empty
boolean emptyBucket = true;
while (!cursor.isDone()) {
granularizer.setCurrentOffsets(bucketInterval);
if (granularizer.getEndOffset() > granularizer.getStartOffset()) {
if (emptyBucket) {
aggregators.init(buffer, 0);
}
aggregators.aggregateVector(buffer, 0, granularizer.getStartOffset(), granularizer.getEndOffset());
emptyBucket = false;
}
if (!granularizer.advanceCursorWithinBucket()) {
break;
}
}
if (emptyBucket && skipEmptyBuckets) {
// Return null, will get filtered out later by the Objects::nonNull filter.
return null;
}
final TimeseriesResultBuilder bob = new TimeseriesResultBuilder(gran.toDateTime(bucketInterval.getStartMillis()));
if (emptyBucket) {
aggregators.init(buffer, 0);
}
for (int i = 0; i < aggregatorSpecs.size(); i++) {
bob.addMetric(aggregatorSpecs.get(i).getName(), aggregators.get(buffer, 0, i));
}
return bob.build();
}).filter(Objects::nonNull), closer);
} catch (Throwable t1) {
try {
closer.close();
} catch (Throwable t2) {
t1.addSuppressed(t2);
}
throw t1;
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class QueryableIndexCursorSequenceBuilder method buildVectorized.
public VectorCursor buildVectorized(final int vectorSize) {
// Sanity check - matches QueryableIndexStorageAdapter.canVectorize
Preconditions.checkState(!descending, "!descending");
final Map<String, BaseColumn> columnCache = new HashMap<>();
final Closer closer = Closer.create();
NumericColumn timestamps = null;
final int startOffset;
final int endOffset;
if (interval.getStartMillis() > minDataTimestamp) {
timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn();
closer.register(timestamps);
startOffset = timeSearch(timestamps, interval.getStartMillis(), 0, index.getNumRows());
} else {
startOffset = 0;
}
if (interval.getEndMillis() <= maxDataTimestamp) {
if (timestamps == null) {
timestamps = (NumericColumn) index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME).getColumn();
closer.register(timestamps);
}
endOffset = timeSearch(timestamps, interval.getEndMillis(), startOffset, index.getNumRows());
} else {
endOffset = index.getNumRows();
}
final VectorOffset baseOffset = filterBitmap == null ? new NoFilterVectorOffset(vectorSize, startOffset, endOffset) : new BitmapVectorOffset(vectorSize, filterBitmap, startOffset, endOffset);
// baseColumnSelectorFactory using baseOffset is the column selector for filtering.
final VectorColumnSelectorFactory baseColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset(columnCache, baseOffset, closer);
if (postFilter == null) {
return new QueryableIndexVectorCursor(baseColumnSelectorFactory, baseOffset, vectorSize, closer);
} else {
final VectorOffset filteredOffset = FilteredVectorOffset.create(baseOffset, baseColumnSelectorFactory, postFilter);
// Now create the cursor and column selector that will be returned to the caller.
//
// There is an inefficiency with how we do things here: this cursor (the one that will be provided to the
// caller) does share a columnCache with "baseColumnSelectorFactory", but it *doesn't* share vector data. This
// means that if the caller wants to read from a column that is also used for filtering, the underlying column
// object will get hit twice for some of the values (anything that matched the filter). This is probably most
// noticeable if it causes thrashing of decompression buffers due to out-of-order reads. I haven't observed
// this directly but it seems possible in principle.
// baseColumnSelectorFactory using baseOffset is the column selector for filtering.
final VectorColumnSelectorFactory filteredColumnSelectorFactory = makeVectorColumnSelectorFactoryForOffset(columnCache, filteredOffset, closer);
return new QueryableIndexVectorCursor(filteredColumnSelectorFactory, filteredOffset, vectorSize, closer);
}
}
use of org.apache.druid.java.util.common.io.Closer in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
final Metadata segmentMetadata;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
FileUtils.mkdirp(outDir);
SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
closer.register(segmentWriteOutMedium);
long startTime = System.currentTimeMillis();
Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
if (customSegmentLoader != null) {
mapper.writeValue(fos, customSegmentLoader);
} else {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
}
log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMergerV9> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionHandler handler = handlers.get(mergedDimensions.get(i));
mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
}
/**
*********** Setup Dim Conversions *************
*/
progress.progress();
startTime = System.currentTimeMillis();
writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/**
*********** Walk through data sets, merge them, and write merged columns ************
*/
progress.progress();
final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
closer.register(timeAndDimsIterator);
final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
/**
********** Create Inverted Indexes and Finalize Build Columns ************
*/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/**
*********** Make index.drd & metadata.drd files *************
*/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
Aggregations