Search in sources :

Example 6 with FileSmoosher

use of org.apache.druid.java.util.common.io.smoosh.FileSmoosher in project druid by druid-io.

the class IndexMergerV9 method makeIndexFiles.

private File makeIndexFiles(final List<IndexableAdapter> adapters, @Nullable final AggregatorFactory[] metricAggs, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Function<List<TransformableRowIterator>, TimeAndDimsIterator> rowMergerFn, final boolean fillRowNumConversions, final IndexSpec indexSpec, @Nullable final SegmentWriteOutMediumFactory segmentWriteOutMediumFactory) throws IOException {
    progress.start();
    progress.progress();
    List<Metadata> metadataList = Lists.transform(adapters, IndexableAdapter::getMetadata);
    final Metadata segmentMetadata;
    if (metricAggs != null) {
        AggregatorFactory[] combiningMetricAggs = new AggregatorFactory[metricAggs.length];
        for (int i = 0; i < metricAggs.length; i++) {
            combiningMetricAggs[i] = metricAggs[i].getCombiningFactory();
        }
        segmentMetadata = Metadata.merge(metadataList, combiningMetricAggs);
    } else {
        segmentMetadata = Metadata.merge(metadataList, null);
    }
    Closer closer = Closer.create();
    try {
        final FileSmoosher v9Smoosher = new FileSmoosher(outDir);
        FileUtils.mkdirp(outDir);
        SegmentWriteOutMediumFactory omf = segmentWriteOutMediumFactory != null ? segmentWriteOutMediumFactory : defaultSegmentWriteOutMediumFactory;
        log.debug("Using SegmentWriteOutMediumFactory[%s]", omf.getClass().getSimpleName());
        SegmentWriteOutMedium segmentWriteOutMedium = omf.makeSegmentWriteOutMedium(outDir);
        closer.register(segmentWriteOutMedium);
        long startTime = System.currentTimeMillis();
        Files.asByteSink(new File(outDir, "version.bin")).write(Ints.toByteArray(IndexIO.V9_VERSION));
        log.debug("Completed version.bin in %,d millis.", System.currentTimeMillis() - startTime);
        progress.progress();
        startTime = System.currentTimeMillis();
        try (FileOutputStream fos = new FileOutputStream(new File(outDir, "factory.json"))) {
            SegmentizerFactory customSegmentLoader = indexSpec.getSegmentLoader();
            if (customSegmentLoader != null) {
                mapper.writeValue(fos, customSegmentLoader);
            } else {
                mapper.writeValue(fos, new MMappedQueryableSegmentizerFactory(indexIO));
            }
        }
        log.debug("Completed factory.json in %,d millis", System.currentTimeMillis() - startTime);
        progress.progress();
        final Map<String, ValueType> metricsValueTypes = new TreeMap<>(Comparators.naturalNullsFirst());
        final Map<String, String> metricTypeNames = new TreeMap<>(Comparators.naturalNullsFirst());
        final List<ColumnCapabilities> dimCapabilities = Lists.newArrayListWithCapacity(mergedDimensions.size());
        mergeCapabilities(adapters, mergedDimensions, metricsValueTypes, metricTypeNames, dimCapabilities);
        final Map<String, DimensionHandler> handlers = makeDimensionHandlers(mergedDimensions, dimCapabilities);
        final List<DimensionMergerV9> mergers = new ArrayList<>();
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionHandler handler = handlers.get(mergedDimensions.get(i));
            mergers.add(handler.makeMerger(indexSpec, segmentWriteOutMedium, dimCapabilities.get(i), progress, closer));
        }
        /**
         *********** Setup Dim Conversions *************
         */
        progress.progress();
        startTime = System.currentTimeMillis();
        writeDimValuesAndSetupDimConversion(adapters, progress, mergedDimensions, mergers);
        log.debug("Completed dim conversions in %,d millis.", System.currentTimeMillis() - startTime);
        /**
         *********** Walk through data sets, merge them, and write merged columns ************
         */
        progress.progress();
        final TimeAndDimsIterator timeAndDimsIterator = makeMergedTimeAndDimsIterator(adapters, mergedDimensions, mergedMetrics, rowMergerFn, handlers, mergers);
        closer.register(timeAndDimsIterator);
        final GenericColumnSerializer timeWriter = setupTimeWriter(segmentWriteOutMedium, indexSpec);
        final ArrayList<GenericColumnSerializer> metricWriters = setupMetricsWriters(segmentWriteOutMedium, mergedMetrics, metricsValueTypes, metricTypeNames, indexSpec);
        List<IntBuffer> rowNumConversions = mergeIndexesAndWriteColumns(adapters, progress, timeAndDimsIterator, timeWriter, metricWriters, mergers, fillRowNumConversions);
        /**
         ********** Create Inverted Indexes and Finalize Build Columns ************
         */
        final String section = "build inverted index and columns";
        progress.startSection(section);
        makeTimeColumn(v9Smoosher, progress, timeWriter, indexSpec);
        makeMetricsColumns(v9Smoosher, progress, mergedMetrics, metricsValueTypes, metricTypeNames, metricWriters, indexSpec);
        for (int i = 0; i < mergedDimensions.size(); i++) {
            DimensionMergerV9 merger = mergers.get(i);
            merger.writeIndexes(rowNumConversions);
            if (merger.canSkip()) {
                continue;
            }
            ColumnDescriptor columnDesc = merger.makeColumnDescriptor();
            makeColumn(v9Smoosher, mergedDimensions.get(i), columnDesc);
        }
        progress.stopSection(section);
        /**
         *********** Make index.drd & metadata.drd files *************
         */
        progress.progress();
        makeIndexBinary(v9Smoosher, adapters, outDir, mergedDimensions, mergedMetrics, progress, indexSpec, mergers);
        makeMetadataBinary(v9Smoosher, progress, segmentMetadata);
        v9Smoosher.close();
        progress.stop();
        return outDir;
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
}
Also used : ArrayList(java.util.ArrayList) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) Closer(org.apache.druid.java.util.common.io.Closer) ValueType(org.apache.druid.segment.column.ValueType) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) ColumnDescriptor(org.apache.druid.segment.column.ColumnDescriptor) MMappedQueryableSegmentizerFactory(org.apache.druid.segment.loading.MMappedQueryableSegmentizerFactory) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TreeMap(java.util.TreeMap) FileOutputStream(java.io.FileOutputStream) IntBuffer(java.nio.IntBuffer) File(java.io.File)

Example 7 with FileSmoosher

use of org.apache.druid.java.util.common.io.smoosh.FileSmoosher in project druid by druid-io.

the class GenericIndexedBenchmark method createGenericIndexed.

@Setup(Level.Trial)
public void createGenericIndexed() throws IOException {
    GenericIndexedWriter<byte[]> genericIndexedWriter = new GenericIndexedWriter<>(new OffHeapMemorySegmentWriteOutMedium(), "genericIndexedBenchmark", BYTE_ARRAY_STRATEGY);
    genericIndexedWriter.open();
    // GenericIndexObject caches prevObject for comparison, so need two arrays for correct objectsSorted computation.
    ByteBuffer[] elements = new ByteBuffer[2];
    elements[0] = ByteBuffer.allocate(elementSize);
    elements[1] = ByteBuffer.allocate(elementSize);
    for (int i = 0; i < n; i++) {
        ByteBuffer element = elements[i & 1];
        element.putInt(0, i);
        genericIndexedWriter.write(element.array());
    }
    smooshDir = FileUtils.createTempDir();
    file = File.createTempFile("genericIndexedBenchmark", "meta");
    try (FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
        FileSmoosher fileSmoosher = new FileSmoosher(smooshDir)) {
        genericIndexedWriter.writeTo(fileChannel, fileSmoosher);
    }
    FileChannel fileChannel = FileChannel.open(file.toPath());
    MappedByteBuffer byteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, file.length());
    genericIndexed = GenericIndexed.read(byteBuffer, BYTE_ARRAY_STRATEGY, SmooshedFileMapper.load(smooshDir));
}
Also used : GenericIndexedWriter(org.apache.druid.segment.data.GenericIndexedWriter) MappedByteBuffer(java.nio.MappedByteBuffer) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer) Setup(org.openjdk.jmh.annotations.Setup)

Example 8 with FileSmoosher

use of org.apache.druid.java.util.common.io.smoosh.FileSmoosher in project druid by druid-io.

the class CompressedVSizeColumnarIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int chunkSize) throws Exception {
    File tmpDirectory = temporaryFolder.newFolder();
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    final String columnName = "test";
    GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "test", compressionStrategy, Long.BYTES * 10000);
    CompressedVSizeColumnarIntsSerializer writer = new CompressedVSizeColumnarIntsSerializer(columnName, segmentWriteOutMedium, vals.length > 0 ? Ints.max(vals) : 0, chunkSize, byteOrder, compressionStrategy, genericIndexed);
    writer.open();
    for (int val : vals) {
        writer.addValue(val);
    }
    final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
    writer.writeTo(channel, smoosher);
    channel.close();
    smoosher.close();
    SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
    CompressedVSizeColumnarIntsSupplier supplierFromByteBuffer = CompressedVSizeColumnarIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
    ColumnarInts columnarInts = supplierFromByteBuffer.get();
    for (int i = 0; i < vals.length; ++i) {
        Assert.assertEquals(vals[i], columnarInts.get(i));
    }
    CloseableUtils.closeAll(columnarInts, mapper);
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 9 with FileSmoosher

use of org.apache.druid.java.util.common.io.smoosh.FileSmoosher in project druid by druid-io.

the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
    File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
    try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
        CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
        GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
        CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
        V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
        writer.open();
        for (int[] val : vals) {
            writer.addValues(new ArrayBasedIndexedInts(val));
        }
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeTo(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
        ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
        Assert.assertEquals(columnarMultiInts.size(), vals.size());
        for (int i = 0; i < vals.size(); ++i) {
            IndexedInts subVals = columnarMultiInts.get(i);
            Assert.assertEquals(subVals.size(), vals.get(i).length);
            for (int j = 0, size = subVals.size(); j < size; ++j) {
                Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
            }
        }
        CloseableUtils.closeAll(columnarMultiInts, mapper);
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 10 with FileSmoosher

use of org.apache.druid.java.util.common.io.smoosh.FileSmoosher in project druid by druid-io.

the class CompressedColumnarIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int chunkFactor) throws Exception {
    File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedIntsIndexedWriterTest_%d", chunkFactor));
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    CompressedColumnarIntsSerializer writer = new CompressedColumnarIntsSerializer("test", segmentWriteOutMedium, chunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "test", compressionStrategy, Long.BYTES * 10000));
    writer.open();
    for (int val : vals) {
        writer.addValue(val);
    }
    final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
    writer.writeTo(channel, smoosher);
    channel.close();
    smoosher.close();
    SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
    // read from ByteBuffer and check values
    CompressedColumnarIntsSupplier supplierFromByteBuffer = CompressedColumnarIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
    ColumnarInts columnarInts = supplierFromByteBuffer.get();
    Assert.assertEquals(vals.length, columnarInts.size());
    for (int i = 0; i < vals.length; ++i) {
        Assert.assertEquals(vals[i], columnarInts.get(i));
    }
    CloseableUtils.closeAndWrapExceptions(columnarInts);
    mapper.close();
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Aggregations

FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)11 File (java.io.File)6 SmooshedFileMapper (org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)5 SmooshedWriter (org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)5 OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)5 SegmentWriteOutMedium (org.apache.druid.segment.writeout.SegmentWriteOutMedium)5 WriteOutBytes (org.apache.druid.segment.writeout.WriteOutBytes)3 Test (org.junit.Test)2 FileOutputStream (java.io.FileOutputStream)1 ByteBuffer (java.nio.ByteBuffer)1 IntBuffer (java.nio.IntBuffer)1 MappedByteBuffer (java.nio.MappedByteBuffer)1 FileChannel (java.nio.channels.FileChannel)1 ArrayList (java.util.ArrayList)1 Random (java.util.Random)1 TreeMap (java.util.TreeMap)1 Nullable (javax.annotation.Nullable)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 Closer (org.apache.druid.java.util.common.io.Closer)1 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)1