Search in sources :

Example 1 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class V3CompressedVSizeColumnarMultiIntsSerializerTest method generateV2SerializedSizeAndData.

private void generateV2SerializedSizeAndData(long numRows, int maxValue, int maxValuesPerRow, int offsetChunkFactor, int valueChunkFactor) throws Exception {
    File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    try (SegmentWriteOutMedium segmentWriteOutMedium = TmpFileSegmentWriteOutMediumFactory.instance().makeSegmentWriteOutMedium(temporaryFolder.newFolder())) {
        CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
        GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
        CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
        V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
        writer.open();
        for (long l = 0L; l < numRows; l++) {
            writer.addValues(new ArrayBasedIndexedInts(generateRow(rand, maxValue, maxValuesPerRow)));
        }
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeTo(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
        ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
        Assert.assertEquals(columnarMultiInts.size(), numRows);
        Random verifier = new Random(0);
        for (int i = 0; i < numRows; ++i) {
            IndexedInts subVals = columnarMultiInts.get(i);
            int[] expected = generateRow(verifier, maxValue, maxValuesPerRow);
            Assert.assertEquals(subVals.size(), expected.length);
            for (int j = 0, size = subVals.size(); j < size; ++j) {
                Assert.assertEquals(subVals.get(j), expected[j]);
            }
        }
        CloseableUtils.closeAll(columnarMultiInts, mapper);
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) Random(java.util.Random) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 2 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = temporaryFolder.newFolder();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(new ObjectColumnSelector() {

                        @Nullable
                        @Override
                        public Object getObject() {
                            return collector;
                        }

                        @Override
                        public Class classOfObject() {
                            return HyperLogLogCollector.class;
                        }

                        @Override
                        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                        // doesn't matter in tests
                        }
                    });
                }
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeTo(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder, null);
            ColumnHolder columnHolder = builder.build();
            ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) File(java.io.File) Nullable(javax.annotation.Nullable) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(org.apache.druid.segment.column.ComplexColumn) ObjectColumnSelector(org.apache.druid.segment.ObjectColumnSelector) Test(org.junit.Test)

Example 3 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class IndexMergerV9 method makeIndexBinary.

private void makeIndexBinary(final FileSmoosher v9Smoosher, final List<IndexableAdapter> adapters, final File outDir, final List<String> mergedDimensions, final List<String> mergedMetrics, final ProgressIndicator progress, final IndexSpec indexSpec, final List<DimensionMergerV9> mergers) throws IOException {
    final String section = "make index.drd";
    progress.startSection(section);
    long startTime = System.currentTimeMillis();
    final Set<String> finalDimensions = new LinkedHashSet<>();
    final Set<String> finalColumns = new LinkedHashSet<>(mergedMetrics);
    for (int i = 0; i < mergedDimensions.size(); ++i) {
        if (mergers.get(i).canSkip()) {
            continue;
        }
        finalColumns.add(mergedDimensions.get(i));
        finalDimensions.add(mergedDimensions.get(i));
    }
    GenericIndexed<String> cols = GenericIndexed.fromIterable(finalColumns, GenericIndexed.STRING_STRATEGY);
    GenericIndexed<String> dims = GenericIndexed.fromIterable(finalDimensions, GenericIndexed.STRING_STRATEGY);
    final String bitmapSerdeFactoryType = mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory());
    final long numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16 + SERIALIZER_UTILS.getSerializedStringByteSize(bitmapSerdeFactoryType);
    final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
    cols.writeTo(writer, v9Smoosher);
    dims.writeTo(writer, v9Smoosher);
    DateTime minTime = DateTimes.MAX;
    DateTime maxTime = DateTimes.MIN;
    for (IndexableAdapter index : adapters) {
        minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
        maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
    }
    final Interval dataInterval = new Interval(minTime, maxTime);
    SERIALIZER_UTILS.writeLong(writer, dataInterval.getStartMillis());
    SERIALIZER_UTILS.writeLong(writer, dataInterval.getEndMillis());
    SERIALIZER_UTILS.writeString(writer, bitmapSerdeFactoryType);
    writer.close();
    IndexIO.checkFileSize(new File(outDir, "index.drd"));
    log.debug("Completed index.drd in %,d millis.", System.currentTimeMillis() - startTime);
    progress.stopSection(section);
}
Also used : LinkedHashSet(java.util.LinkedHashSet) SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) File(java.io.File) DateTime(org.joda.time.DateTime) Interval(org.joda.time.Interval)

Example 4 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class IndexMergerV9 method makeColumn.

private void makeColumn(final FileSmoosher v9Smoosher, final String columnName, final ColumnDescriptor serdeficator) throws IOException {
    ZeroCopyByteArrayOutputStream specBytes = new ZeroCopyByteArrayOutputStream();
    SERIALIZER_UTILS.writeString(specBytes, mapper.writeValueAsString(serdeficator));
    try (SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(columnName, specBytes.size() + serdeficator.getSerializedSize())) {
        specBytes.writeTo(channel);
        serdeficator.writeTo(channel, v9Smoosher);
    }
}
Also used : ZeroCopyByteArrayOutputStream(org.apache.druid.io.ZeroCopyByteArrayOutputStream) SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)

Example 5 with SmooshedWriter

use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class GenericIndexedWriter method writeHeaderLong.

private void writeHeaderLong(FileSmoosher smoosher, int bagSizePower) throws IOException {
    ByteBuffer helperBuffer = ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.nativeOrder());
    int numberOfElementsPerValueFile = 1 << bagSizePower;
    long currentNumBytes = 0;
    long relativeRefBytes = 0;
    long relativeNumBytes;
    try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateHeaderFileName(filenameBase), ((long) numWritten) * Integer.BYTES)) {
        // following block converts long header indexes into int header indexes.
        for (int pos = 0; pos < numWritten; pos++) {
            // to current offset.
            if ((pos & (numberOfElementsPerValueFile - 1)) == 0) {
                relativeRefBytes = currentNumBytes;
            }
            currentNumBytes = headerOutLong.getLong(pos);
            relativeNumBytes = currentNumBytes - relativeRefBytes;
            helperBuffer.putInt(0, checkedCastNonnegativeLongToInt(relativeNumBytes));
            helperBuffer.clear();
            smooshChannel.write(helperBuffer);
        }
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) ByteBuffer(java.nio.ByteBuffer)

Aggregations

SmooshedWriter (org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)9 File (java.io.File)6 FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)5 SmooshedFileMapper (org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)5 OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)3 SegmentWriteOutMedium (org.apache.druid.segment.writeout.SegmentWriteOutMedium)3 DataInputStream (java.io.DataInputStream)1 InputStream (java.io.InputStream)1 ByteBuffer (java.nio.ByteBuffer)1 LinkedHashSet (java.util.LinkedHashSet)1 Random (java.util.Random)1 Nullable (javax.annotation.Nullable)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 ZeroCopyByteArrayOutputStream (org.apache.druid.io.ZeroCopyByteArrayOutputStream)1 IAE (org.apache.druid.java.util.common.IAE)1 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)1 ObjectColumnSelector (org.apache.druid.segment.ObjectColumnSelector)1 ColumnBuilder (org.apache.druid.segment.column.ColumnBuilder)1 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)1 ComplexColumn (org.apache.druid.segment.column.ComplexColumn)1