Search in sources :

Example 6 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class GenericIndexedWriter method writeToMultiFiles.

private void writeToMultiFiles(WritableByteChannel channel, FileSmoosher smoosher) throws IOException {
    Preconditions.checkState(headerOutLong.size() == numWritten, "numWritten[%s] number of rows doesn't match headerOutLong's size[%s]", numWritten, headerOutLong.size());
    Preconditions.checkState((((long) headerOutLong.size()) * Long.BYTES) < (Integer.MAX_VALUE & ~PAGE_SIZE), "Wrote[%s] bytes in header, which is too many.", (((long) headerOutLong.size()) * Long.BYTES));
    if (smoosher == null) {
        throw new IAE("version 2 GenericIndexedWriter requires FileSmoosher.");
    }
    int bagSizePower = bagSizePower();
    MULTI_FILE_META_SERDE_HELPER.writeTo(channel, this);
    long previousValuePosition = 0;
    int bagSize = 1 << bagSizePower;
    int numberOfFilesRequired = GenericIndexed.getNumberOfFilesRequired(bagSize, numWritten);
    byte[] buffer = new byte[1 << 16];
    try (InputStream is = valuesOut.asInputStream()) {
        int counter = -1;
        for (int i = 0; i < numberOfFilesRequired; i++) {
            long valuePosition;
            if (i != numberOfFilesRequired - 1) {
                valuePosition = headerOutLong.getLong(bagSize + counter);
                counter = counter + bagSize;
            } else {
                valuePosition = headerOutLong.getLong(numWritten - 1);
            }
            long numBytesToPutInFile = valuePosition - previousValuePosition;
            try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateValueFileName(filenameBase, i), numBytesToPutInFile)) {
                writeBytesIntoSmooshedChannel(numBytesToPutInFile, buffer, smooshChannel, is);
                previousValuePosition = valuePosition;
            }
        }
    }
    writeHeaderLong(smoosher, bagSizePower);
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) DataInputStream(java.io.DataInputStream) InputStream(java.io.InputStream) IAE(org.apache.druid.java.util.common.IAE)

Example 7 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class CompressedVSizeColumnarIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int chunkSize) throws Exception {
    File tmpDirectory = temporaryFolder.newFolder();
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    final String columnName = "test";
    GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "test", compressionStrategy, Long.BYTES * 10000);
    CompressedVSizeColumnarIntsSerializer writer = new CompressedVSizeColumnarIntsSerializer(columnName, segmentWriteOutMedium, vals.length > 0 ? Ints.max(vals) : 0, chunkSize, byteOrder, compressionStrategy, genericIndexed);
    writer.open();
    for (int val : vals) {
        writer.addValue(val);
    }
    final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
    writer.writeTo(channel, smoosher);
    channel.close();
    smoosher.close();
    SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
    CompressedVSizeColumnarIntsSupplier supplierFromByteBuffer = CompressedVSizeColumnarIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
    ColumnarInts columnarInts = supplierFromByteBuffer.get();
    for (int i = 0; i < vals.length; ++i) {
        Assert.assertEquals(vals[i], columnarInts.get(i));
    }
    CloseableUtils.closeAll(columnarInts, mapper);
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 8 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
    File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
    try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
        CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
        GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
        CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
        V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
        writer.open();
        for (int[] val : vals) {
            writer.addValues(new ArrayBasedIndexedInts(val));
        }
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeTo(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
        ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
        Assert.assertEquals(columnarMultiInts.size(), vals.size());
        for (int i = 0; i < vals.size(); ++i) {
            IndexedInts subVals = columnarMultiInts.get(i);
            Assert.assertEquals(subVals.size(), vals.get(i).length);
            for (int j = 0, size = subVals.size(); j < size; ++j) {
                Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
            }
        }
        CloseableUtils.closeAll(columnarMultiInts, mapper);
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 9 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class CompressedColumnarIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int chunkFactor) throws Exception {
    File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedIntsIndexedWriterTest_%d", chunkFactor));
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    CompressedColumnarIntsSerializer writer = new CompressedColumnarIntsSerializer("test", segmentWriteOutMedium, chunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "test", compressionStrategy, Long.BYTES * 10000));
    writer.open();
    for (int val : vals) {
        writer.addValue(val);
    }
    final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
    writer.writeTo(channel, smoosher);
    channel.close();
    smoosher.close();
    SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
    // read from ByteBuffer and check values
    CompressedColumnarIntsSupplier supplierFromByteBuffer = CompressedColumnarIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
    ColumnarInts columnarInts = supplierFromByteBuffer.get();
    Assert.assertEquals(vals.length, columnarInts.size());
    for (int i = 0; i < vals.length; ++i) {
        Assert.assertEquals(vals[i], columnarInts.get(i));
    }
    CloseableUtils.closeAndWrapExceptions(columnarInts);
    mapper.close();
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Aggregations

SmooshedWriter (org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)9 File (java.io.File)6 FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)5 SmooshedFileMapper (org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)5 OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)3 SegmentWriteOutMedium (org.apache.druid.segment.writeout.SegmentWriteOutMedium)3 DataInputStream (java.io.DataInputStream)1 InputStream (java.io.InputStream)1 ByteBuffer (java.nio.ByteBuffer)1 LinkedHashSet (java.util.LinkedHashSet)1 Random (java.util.Random)1 Nullable (javax.annotation.Nullable)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 ZeroCopyByteArrayOutputStream (org.apache.druid.io.ZeroCopyByteArrayOutputStream)1 IAE (org.apache.druid.java.util.common.IAE)1 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)1 ObjectColumnSelector (org.apache.druid.segment.ObjectColumnSelector)1 ColumnBuilder (org.apache.druid.segment.column.ColumnBuilder)1 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)1 ComplexColumn (org.apache.druid.segment.column.ComplexColumn)1