Search in sources :

Example 1 with ColumnDescriptor

use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.

the class LongDimensionMergerV9 method makeColumnDescriptor.

@Override
public ColumnDescriptor makeColumnDescriptor() {
    final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
    builder.setValueType(ValueType.LONG);
    ColumnPartSerde serde = IndexMergerV9.createLongColumnPartSerde(serializer, indexSpec);
    builder.addSerde(serde);
    return builder.build();
}
Also used : ColumnPartSerde(org.apache.druid.segment.serde.ColumnPartSerde) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor)

Example 2 with ColumnDescriptor

use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
    final Metadata segmentMetadata;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        FileUtils.mkdirp(outDir);
        SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
        log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
        SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
        closer.register(segmentWriteOutMedium);
        long startTime = System.currentTimeMillis();
        Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
        log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
            if (customSegmentLoader != null) {
                mapper.writeValue(fos, customSegmentLoader);
            } else {
                mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
            }
        }
        log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
        final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
        final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMergerV9> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionHandler handler = handlers.get(mergedDimensions.get(i));
            mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
        }
        /**
         *********** Setup Dim Conversions *************
         */
        progress.progress();
        startTime = System.currentTimeMillis();
        writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /**
         *********** Walk through data sets, merge them, and write merged columns ************
         */
        progress.progress();
        final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
        closer.register(timeAndDimsIterator);
        final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
        final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
        /**
         ********** Create Inverted Indexes and Finalize Build Columns ************
         */
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = mergers.get(i);
            merger.writeIndexes(rowNumConversions);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /**
         *********** Make index.drd & metadata.drd files *************
         */
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) Closer(org.apache.druid.java.util.common.io.Closer) ValueType(org.apache.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TreeMap(java.util.TreeMap) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File)

Example 3 with ColumnDescriptor

use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.

the class IndexMergerV9 method makeTimeColumn.

private void makeTimeColumn(final FileSmoosher v9Smoosher, final ProgressIndicator progress, final GenericColumnSerializer timeWriter, final IndexSpec indexSpec) throws IOException {
    final String section = "make time column";
    progress.startSection(section);
    long startTime = System.currentTimeMillis();
    final ColumnDescriptor serdeficator = ColumnDescriptor.builder().setValueType(ValueType.LONG).addSerde(createLongColumnPartSerde(timeWriter, indexSpec)).build();
    makeColumn(v9Smoosher, ColumnHolder.TIME_COLUMN_NAME, serdeficator);
    log.debug("Completed time column in %,d millis.", System.currentTimeMillis() - startTime);
    progress.stopSection(section);
}
Also used : ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor)

Example 4 with ColumnDescriptor

use of org.apache.druid.segment.column.ColumnDescriptor in project presto by prestodb.

the class V9SegmentIndexSource method createColumnHolder.

private ColumnHolder createColumnHolder(String columnName) {
    try {
        ByteBuffer columnData = ByteBuffer.wrap(segmentColumnSource.getColumnData(columnName));
        ColumnDescriptor columnDescriptor = readColumnDescriptor(columnData);
        return columnDescriptor.read(columnData, () -> 0, null);
    } catch (IOException e) {
        throw new PrestoException(DRUID_SEGMENT_LOAD_ERROR, e);
    }
}
Also used : ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) PrestoException(com.facebook.presto.spi.PrestoException) IOException(java.io.IOException) ByteBuffer(java.nio.ByteBuffer)

Example 5 with ColumnDescriptor

use of org.apache.druid.segment.column.ColumnDescriptor in project druid by druid-io.

the class StringDimensionMergerV9 method makeColumnDescriptor.

@Override
public ColumnDescriptor makeColumnDescriptor() {
    // Now write everything
    boolean hasMultiValue = capabilities.hasMultipleValues().isTrue();
    final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression();
    final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
    final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
    builder.setValueType(ValueType.STRING);
    builder.setHasMultipleValues(hasMultiValue);
    final DictionaryEncodedColumnPartSerde.SerializerBuilder partBuilder = DictionaryEncodedColumnPartSerde.serializerBuilder().withDictionary(dictionaryWriter).withValue(encodedValueSerializer, hasMultiValue, compressionStrategy != CompressionStrategy.UNCOMPRESSED).withBitmapSerdeFactory(bitmapSerdeFactory).withBitmapIndex(bitmapWriter).withSpatialIndex(spatialWriter).withByteOrder(IndexIO.BYTE_ORDER);
    return builder.addSerde(partBuilder.build()).build();
}
Also used : ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) DictionaryEncodedColumnPartSerde(org.apache.druid.segment.serde.DictionaryEncodedColumnPartSerde) BitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerdeFactory)

Aggregations

ColumnDescriptor (org.apache.druid.segment.column.ColumnDescriptor)7 ColumnPartSerde (org.apache.druid.segment.serde.ColumnPartSerde)3 PrestoException (com.facebook.presto.spi.PrestoException)1 File (java.io.File)1 FileOutputStream (java.io.FileOutputStream)1 IOException (java.io.IOException)1 ByteBuffer (java.nio.ByteBuffer)1 IntBuffer (java.nio.IntBuffer)1 ArrayList (java.util.ArrayList)1 TreeMap (java.util.TreeMap)1 Closer (org.apache.druid.java.util.common.io.Closer)1 FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)1 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)1 ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)1 ValueType (org.apache.druid.segment.column.ValueType)1 BitmapSerdeFactory (org.apache.druid.segment.data.BitmapSerdeFactory)1 CompressionStrategy (org.apache.druid.segment.data.CompressionStrategy)1 MMappedQueryableSegmentizerFactory (org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory)1 SegmentizerFactory (org.apache.druid.segment.loading.SegmentizerFactory)1 DictionaryEncodedColumnPartSerde (org.apache.druid.segment.serde.DictionaryEncodedColumnPartSerde)1