use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class CompactionTask method createDimensionsSpec.
private static DimensionsSpec createDimensionsSpec(List<NonnullPair<QueryableIndex, DataSegment>> queryableIndices) {
final BiMap<String, Integer> uniqueDims = HashBiMap.create();
final Map<String, DimensionSchema> dimensionSchemaMap = new HashMap<>();
// Here, we try to retain the order of dimensions as they were specified since the order of dimensions may be
// optimized for performance.
// Dimensions are extracted from the recent segments to olders because recent segments are likely to be queried more
// frequently, and thus the performance should be optimized for recent ones rather than old ones.
// sort timelineSegments in order of interval, see https://github.com/apache/druid/pull/9905
queryableIndices.sort((o1, o2) -> Comparators.intervalsByStartThenEnd().compare(o1.rhs.getInterval(), o2.rhs.getInterval()));
int index = 0;
for (NonnullPair<QueryableIndex, DataSegment> pair : Lists.reverse(queryableIndices)) {
final QueryableIndex queryableIndex = pair.lhs;
final Map<String, DimensionHandler> dimensionHandlerMap = queryableIndex.getDimensionHandlers();
for (String dimension : queryableIndex.getAvailableDimensions()) {
final ColumnHolder columnHolder = Preconditions.checkNotNull(queryableIndex.getColumnHolder(dimension), "Cannot find column for dimension[%s]", dimension);
if (!uniqueDims.containsKey(dimension)) {
final DimensionHandler dimensionHandler = Preconditions.checkNotNull(dimensionHandlerMap.get(dimension), "Cannot find dimensionHandler for dimension[%s]", dimension);
uniqueDims.put(dimension, index++);
dimensionSchemaMap.put(dimension, createDimensionSchema(dimension, columnHolder.getCapabilities(), dimensionHandler.getMultivalueHandling()));
}
}
}
final BiMap<Integer, String> orderedDims = uniqueDims.inverse();
final List<DimensionSchema> dimensionSchemas = IntStream.range(0, orderedDims.size()).mapToObj(i -> {
final String dimName = orderedDims.get(i);
return Preconditions.checkNotNull(dimensionSchemaMap.get(dimName), "Cannot find dimension[%s] from dimensionSchemaMap", dimName);
}).collect(Collectors.toList());
return new DimensionsSpec(dimensionSchemas);
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class ColumnarLongsEncodeDataFromSegmentBenchmark method initializeSegmentValueIntermediaryFile.
/**
* writes column values to an intermediary text file, 1 per line, encoders read from this file as input to write
* encoded column files.
*/
private void initializeSegmentValueIntermediaryFile() throws IOException {
File dir = getTmpDir();
File dataFile = new File(dir, getColumnDataFileName(segmentName, columnName));
if (!dataFile.exists()) {
final IndexIO indexIO = new IndexIO(new DefaultObjectMapper(), () -> 0);
try (final QueryableIndex index = indexIO.loadIndex(new File(segmentPath))) {
final Set<String> columnNames = new LinkedHashSet<>();
columnNames.add(ColumnHolder.TIME_COLUMN_NAME);
Iterables.addAll(columnNames, index.getColumnNames());
final ColumnHolder column = index.getColumnHolder(columnName);
final ColumnCapabilities capabilities = column.getCapabilities();
try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
if (!capabilities.is(ValueType.LONG)) {
throw new RuntimeException("Invalid column type, expected 'Long'");
}
LongsColumn theColumn = (LongsColumn) column.getColumn();
for (int i = 0; i < theColumn.length(); i++) {
long value = theColumn.getLongSingleValueRow(i);
writer.write(value + "\n");
}
}
}
}
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class SegmentAnalyzer method analyze.
public Map<String, ColumnAnalysis> analyze(Segment segment) {
Preconditions.checkNotNull(segment, "segment");
// index is null for incremental-index-based segments, but storageAdapter is always available
final QueryableIndex index = segment.asQueryableIndex();
final StorageAdapter storageAdapter = segment.asStorageAdapter();
// get length and column names from storageAdapter
final int length = storageAdapter.getNumRows();
Map<String, ColumnAnalysis> columns = new TreeMap<>();
final RowSignature rowSignature = storageAdapter.getRowSignature();
for (String columnName : rowSignature.getColumnNames()) {
final ColumnCapabilities capabilities;
if (storageAdapter instanceof IncrementalIndexStorageAdapter) {
// See javadocs for getSnapshotColumnCapabilities for a discussion of why we need to do this.
capabilities = ((IncrementalIndexStorageAdapter) storageAdapter).getSnapshotColumnCapabilities(columnName);
} else {
capabilities = storageAdapter.getColumnCapabilities(columnName);
}
final ColumnAnalysis analysis;
switch(capabilities.getType()) {
case LONG:
final int bytesPerRow = ColumnHolder.TIME_COLUMN_NAME.equals(columnName) ? NUM_BYTES_IN_TIMESTAMP : Long.BYTES;
analysis = analyzeNumericColumn(capabilities, length, bytesPerRow);
break;
case FLOAT:
analysis = analyzeNumericColumn(capabilities, length, NUM_BYTES_IN_TEXT_FLOAT);
break;
case DOUBLE:
analysis = analyzeNumericColumn(capabilities, length, Double.BYTES);
break;
case STRING:
if (index != null) {
analysis = analyzeStringColumn(capabilities, index.getColumnHolder(columnName));
} else {
analysis = analyzeStringColumn(capabilities, storageAdapter, columnName);
}
break;
case COMPLEX:
final ColumnHolder columnHolder = index != null ? index.getColumnHolder(columnName) : null;
analysis = analyzeComplexColumn(capabilities, columnHolder);
break;
default:
log.warn("Unknown column type[%s].", capabilities.asTypeString());
analysis = ColumnAnalysis.error(StringUtils.format("unknown_type_%s", capabilities.asTypeString()));
}
columns.put(columnName, analysis);
}
return columns;
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class UseIndexesStrategy method makeTimeFilteredBitmap.
static ImmutableBitmap makeTimeFilteredBitmap(final QueryableIndex index, final Segment segment, final Filter filter, final Interval interval) {
final BitmapFactory bitmapFactory = index.getBitmapFactoryForDimensions();
final ImmutableBitmap baseFilter;
if (filter == null) {
baseFilter = null;
} else {
final BitmapIndexSelector selector = new ColumnSelectorBitmapIndexSelector(index.getBitmapFactoryForDimensions(), VirtualColumns.EMPTY, index);
Preconditions.checkArgument(filter.supportsBitmapIndex(selector), "filter[%s] should support bitmap", filter);
baseFilter = filter.getBitmapIndex(selector);
}
final ImmutableBitmap timeFilteredBitmap;
if (!interval.contains(segment.getDataInterval())) {
final MutableBitmap timeBitmap = bitmapFactory.makeEmptyMutableBitmap();
final ColumnHolder timeColumnHolder = index.getColumnHolder(ColumnHolder.TIME_COLUMN_NAME);
try (final NumericColumn timeValues = (NumericColumn) timeColumnHolder.getColumn()) {
int startIndex = Math.max(0, getStartIndexOfTime(timeValues, interval.getStartMillis(), true));
int endIndex = Math.min(timeValues.length() - 1, getStartIndexOfTime(timeValues, interval.getEndMillis(), false));
for (int i = startIndex; i <= endIndex; i++) {
timeBitmap.add(i);
}
final ImmutableBitmap finalTimeBitmap = bitmapFactory.makeImmutableBitmap(timeBitmap);
timeFilteredBitmap = (baseFilter == null) ? finalTimeBitmap : finalTimeBitmap.intersection(baseFilter);
}
} else {
timeFilteredBitmap = baseFilter;
}
return timeFilteredBitmap;
}
use of org.apache.druid.segment.column.ColumnHolder in project druid by druid-io.
the class ListFilteredVirtualColumn method getBitmapIndex.
@Override
@Nullable
public BitmapIndex getBitmapIndex(String columnName, ColumnSelector selector) {
final ColumnHolder holder = selector.getColumnHolder(delegate.getDimension());
if (holder == null) {
return null;
}
final BitmapIndex underlyingIndex = holder.getBitmapIndex();
if (underlyingIndex == null) {
return null;
}
final IdMapping idMapping;
if (allowList) {
idMapping = ListFilteredDimensionSpec.buildAllowListIdMapping(values, underlyingIndex.getCardinality(), null, underlyingIndex::getValue);
} else {
idMapping = ListFilteredDimensionSpec.buildDenyListIdMapping(values, underlyingIndex.getCardinality(), underlyingIndex::getValue);
}
return new ListFilteredBitmapIndex(underlyingIndex, idMapping);
}
Aggregations