use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.
the class LongDimensionMergerV9 method makeColumnDescriptor.
@Override
public ColumnDescriptor makeColumnDescriptor() {
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.LONG);
ColumnPartSerde serde = IndexMergerV9.createLongColumnPartSerde(serializer, indexSpec);
builder.addSerde(serde);
return builder.build();
}
use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.
the class IndexMergerV9 method makeIndexFiles.
private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
progress.start();
progress.progress();
List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
final Metadata segmentMetadata;
if (metricAggs != null) {
AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
for (int i = 0; i < metricAggs.length; i++) {
combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
}
segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
} else {
segmentMetadata = Metadata.merge(metadataList, null);
}
Closer closer = Closer.create();
try {
final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
FileUtils.mkdirp(outDir);
SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
closer.register(segmentWriteOutMedium);
long startTime = System.currentTimeMillis();
Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
progress.progress();
startTime = System.currentTimeMillis();
try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
if (customSegmentLoader != null) {
mapper.writeValue(fos, customSegmentLoader);
} else {
mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
}
}
log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
progress.progress();
final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
final List<DimensionMergerV9> mergers = new ArrayList<>();
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionHandler handler = handlers.get(mergedDimensions.get(i));
mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
}
/**
*********** Setup Dim Conversions *************
*/
progress.progress();
startTime = System.currentTimeMillis();
writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
/**
*********** Walk through data sets, merge them, and write merged columns ************
*/
progress.progress();
final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
closer.register(timeAndDimsIterator);
final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
/**
********** Create Inverted Indexes and Finalize Build Columns ************
*/
final String section = "build inverted index and columns";
progress.startSection(section);
makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
for (int i = 0; i < mergedDimensions.size(); i++) {
DimensionMergerV9 merger = mergers.get(i);
merger.writeIndexes(rowNumConversions);
if (merger.canSkip()) {
continue;
}
ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
}
progress.stopSection(section);
/**
*********** Make index.drd & metadata.drd files *************
*/
progress.progress();
makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
v9Smoosher.close();
progress.stop();
return outDir;
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
}
use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.
the class IndexMergerV9 method makeTimeColumn.
private void makeTimeColumn(final FileSmoosher v9Smoosher, final ProgressIndicator progress, final GenericColumnSerializer timeWriter, final IndexSpec indexSpec) throws IOException {
final String section = "make time column";
progress.startSection(section);
long startTime = System.currentTimeMillis();
final ColumnDescriptor serdeficator = ColumnDescriptor.builder().setValueType(ValueType.LONG).addSerde(createLongColumnPartSerde(timeWriter, indexSpec)).build();
makeColumn(v9Smoosher, ColumnHolder.TIME_COLUMN_NAME, serdeficator);
log.debug("Completed time column in %,d millis.", System.currentTimeMillis() - startTime);
progress.stopSection(section);
}
use of org.apache.druid.segment.column.ColumnDescriptor in project presto by prestodb.
the class V9SegmentIndexSource method createColumnHolder.
private ColumnHolder createColumnHolder(String columnName) {
try {
ByteBuffer columnData = ByteBuffer.wrap(segmentColumnSource.getColumnData(columnName));
ColumnDescriptor columnDescriptor = readColumnDescriptor(columnData);
return columnDescriptor.read(columnData, () -> 0, null);
} catch (IOException e) {
throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, e);
}
}
use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.
the class StringDimensionMergerV9 method makeColumnDescriptor.
@Override
public ColumnDescriptor makeColumnDescriptor() {
// Now write everything
boolean hasMultiValue = capabilities.hasMultipleValues().isTrue();
final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression();
final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.STRING);
builder.setHasMultipleValues(hasMultiValue);
final DictionaryEncodedColumnPartSerde.SerializerBuilder partBuilder = DictionaryEncodedColumnPartSerde.serializerBuilder().withDictionary(dictionaryWriter).withValue(encodedValueSerializer, hasMultiValue, compressionStrategy != CompressionStrategy.UNCOMPRESSED).withBitmapSerdeFactory(bitmapSerdeFactory).withBitmapIndex(bitmapWriter).withSpatialIndex(spatialWriter).withByteOrder(IndexIO.BYTE_ORDER);
return builder.addSerde(partBuilder.build()).build();
}
Aggregations