use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ColumnProcessors method makeProcessorInternal.
/**
* Creates "column processors", which are objects that wrap a single input column and provide some
* functionality on top of it.
*
* @param inputCapabilitiesFn function that returns capabilities of the column being processed. The type provided
* by these capabilities will be used to determine what kind of selector to create. If
* this function returns null, then processorFactory.defaultType() will be
* used to construct a set of assumed capabilities.
* @param dimensionSelectorFn function that creates a DimensionSelector for the column being processed. Will be
* called if the column type is string.
* @param valueSelectorFunction function that creates a ColumnValueSelector for the column being processed. Will be
* called if the column type is long, float, double, or complex.
* @param processorFactory object that encapsulates the knowledge about how to create processors
* @param selectorFactory column selector factory used for creating the vector processor
*/
private static <T> T makeProcessorInternal(final Function<ColumnSelectorFactory, ColumnCapabilities> inputCapabilitiesFn, final Function<ColumnSelectorFactory, DimensionSelector> dimensionSelectorFn, final Function<ColumnSelectorFactory, ColumnValueSelector<?>> valueSelectorFunction, final ColumnProcessorFactory<T> processorFactory, final ColumnSelectorFactory selectorFactory) {
final ColumnCapabilities capabilities = inputCapabilitiesFn.apply(selectorFactory);
final TypeSignature<ValueType> effectiveType = capabilities != null ? capabilities : processorFactory.defaultType();
switch(effectiveType.getType()) {
case STRING:
return processorFactory.makeDimensionProcessor(dimensionSelectorFn.apply(selectorFactory), mayBeMultiValue(capabilities));
case LONG:
return processorFactory.makeLongProcessor(valueSelectorFunction.apply(selectorFactory));
case FLOAT:
return processorFactory.makeFloatProcessor(valueSelectorFunction.apply(selectorFactory));
case DOUBLE:
return processorFactory.makeDoubleProcessor(valueSelectorFunction.apply(selectorFactory));
case COMPLEX:
return processorFactory.makeComplexProcessor(valueSelectorFunction.apply(selectorFactory));
default:
throw new ISE("Unsupported type[%s]", effectiveType.asTypeString());
}
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class IndexMergerV9 method makeIndexFiles.
private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
final Metadata segmentMetadata;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
FileUtils.mkdirp(outDir);
SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
closer.register(segmentWriteOutMedium);
long startTime = System.currentTimeMillis();
Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
if (customSegmentLoader != null) {
mapper.writeValue(fos, customSegmentLoader);
} else {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
}
log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMergerV9> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionHandler handler = handlers.get(mergedDimensions.get(i));
mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
}
/**
*********** Setup Dim Conversions *************
*/
progress.progress();
startTime = System.currentTimeMillis();
writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/**
*********** Walk through data sets, merge them, and write merged columns ************
*/
progress.progress();
final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
closer.register(timeAndDimsIterator);
final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
/**
********** Create Inverted Indexes and Finalize Build Columns ************
*/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/**
*********** Make index.drd & metadata.drd files *************
*/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class IndexMergerV9 method makeDimensionHandlers.
private Map<String, DimensionHandler> makeDimensionHandlers(final List<String> mergedDimensions, final List<ColumnCapabilities> dimCapabilities) {
Map<String, DimensionHandler> handlers = new LinkedHashMap<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
ColumnCapabilities capabilities = ColumnCapabilitiesImpl.snapshot(dimCapabilities.get(i), DIMENSION_CAPABILITY_MERGE_LOGIC);
String dimName = mergedDimensions.get(i);
DimensionHandler handler = DimensionHandlerUtils.getHandlerFromCapabilities(dimName, capabilities, null);
handlers.put(dimName, handler);
}
return handlers;
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class QueryableIndexColumnSelectorFactory method makeDimensionSelectorUndecorated.
private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
final String dimension = dimensionSpec.getDimension();
final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
final ColumnHolder columnHolder = index.getColumnHolder(dimension);
if (columnHolder == null) {
return DimensionSelector.constant(null, extractionFn);
}
if (dimension.equals(ColumnHolder.TIME_COLUMN_NAME)) {
return new SingleScanTimeDimensionSelector(makeColumnValueSelector(dimension), extractionFn, descending);
}
ColumnCapabilities capabilities = columnHolder.getCapabilities();
if (columnHolder.getCapabilities().isNumeric()) {
return ValueTypes.makeNumericWrappingDimensionSelector(capabilities.getType(), makeColumnValueSelector(dimension), extractionFn);
}
final DictionaryEncodedColumn column = getCachedColumn(dimension, DictionaryEncodedColumn.class);
if (column != null) {
return column.makeDimensionSelector(offset, extractionFn);
} else {
return DimensionSelector.constant(null, extractionFn);
}
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class IncrementalIndexAdapter method processRows.
/**
* Sometimes it's hard to tell whether one dimension contains a null value or not.
* If one dimension had show a null or empty value explicitly, then yes, it contains
* null value. But if one dimension's values are all non-null, it still early to say
* this dimension does not contain null value. Consider a two row case, first row had
* "dimA=1" and "dimB=2", the second row only had "dimA=3". To dimB, its value are "2" and
* never showed a null or empty value. But when we combines these two rows, dimB is null
* in row 2. So we should iterate all rows to determine whether one dimension contains
* a null value.
*/
private void processRows(IncrementalIndex index, BitmapFactory bitmapFactory, List<IncrementalIndex.DimensionDesc> dimensions) {
int rowNum = 0;
for (IncrementalIndexRow row : index.getFacts().persistIterable()) {
final Object[] dims = row.getDims();
for (IncrementalIndex.DimensionDesc dimension : dimensions) {
final int dimIndex = dimension.getIndex();
DimensionAccessor accessor = accessors.get(dimension.getName());
// Add 'null' to the dimension's dictionary.
if (dimIndex >= dims.length || dims[dimIndex] == null) {
accessor.indexer.processRowValsToUnsortedEncodedKeyComponent(null, true);
continue;
}
final ColumnCapabilities capabilities = dimension.getCapabilities();
if (capabilities.hasBitmapIndexes()) {
final MutableBitmap[] bitmapIndexes = accessor.invertedIndexes;
final DimensionIndexer indexer = accessor.indexer;
indexer.fillBitmapsFromUnsortedEncodedKeyComponent(dims[dimIndex], rowNum, bitmapIndexes, bitmapFactory);
}
}
++rowNum;
}
}
Aggregations