Search in sources :

Example 1 with SmooshedWriter

use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = FileUtils.getTempDirectory();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (IOPeon peon = new TmpFileIOPeon();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(collector);
                }
                serializer.close();
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeToChannel(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder);
            Column column = builder.build();
            ComplexColumn complexColumn = column.getComplexColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}
Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) Column(io.druid.segment.column.Column) ComplexColumn(io.druid.segment.column.ComplexColumn) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(io.druid.segment.column.ColumnBuilder) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(io.druid.segment.column.ComplexColumn) Test(org.junit.Test)

Example 2 with SmooshedWriter

use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class IndexMergerV9 method makeColumn.

private void makeColumn(final FileSmoosher v9Smoosher, final String columnName, final ColumnDescriptor serdeficator) throws IOException {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
    byte[] specBytes = baos.toByteArray();
    final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(columnName, serdeficator.numBytes() + specBytes.length);
    try {
        channel.write(ByteBuffer.wrap(specBytes));
        serdeficator.write(channel, v9Smoosher);
    } finally {
        channel.close();
    }
}
Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 3 with SmooshedWriter

use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class GenericIndexedWriter method writeToChannelVersionTwo.

private void writeToChannelVersionTwo(WritableByteChannel channel, FileSmoosher smoosher) throws IOException {
    if (smoosher == null) {
        throw new IAE("version 2 GenericIndexedWriter requires FileSmoosher.");
    }
    int bagSizePower = bagSizePower();
    OutputStream metaOut = Channels.newOutputStream(channel);
    metaOut.write(GenericIndexed.VERSION_TWO);
    metaOut.write(objectsSorted ? 0x1 : 0x0);
    metaOut.write(Ints.toByteArray(bagSizePower));
    metaOut.write(Ints.toByteArray(Ints.checkedCast(numWritten)));
    metaOut.write(Ints.toByteArray(fileNameByteArray.length));
    metaOut.write(fileNameByteArray);
    try (RandomAccessFile headerFile = new RandomAccessFile(ioPeon.getFile(makeFilename("headerLong")), "r")) {
        Preconditions.checkNotNull(headerFile, "header file missing.");
        long previousValuePosition = 0;
        int bagSize = 1 << bagSizePower;
        int numberOfFilesRequired = GenericIndexed.getNumberOfFilesRequired(bagSize, numWritten);
        byte[] buffer = new byte[1 << 16];
        try (InputStream is = new FileInputStream(ioPeon.getFile(makeFilename("values")))) {
            int counter = -1;
            for (int i = 0; i < numberOfFilesRequired; i++) {
                if (i != numberOfFilesRequired - 1) {
                    // 8 for long bytes.
                    headerFile.seek((bagSize + counter) * Longs.BYTES);
                    counter = counter + bagSize;
                } else {
                    // for remaining items.
                    headerFile.seek((numWritten - 1) * Longs.BYTES);
                }
                long valuePosition = Long.reverseBytes(headerFile.readLong());
                long numBytesToPutInFile = valuePosition - previousValuePosition;
                try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateValueFileName(filenameBase, i), numBytesToPutInFile)) {
                    writeBytesIntoSmooshedChannel(numBytesToPutInFile, buffer, smooshChannel, is);
                    previousValuePosition = valuePosition;
                }
            }
        }
        writeHeaderLong(smoosher, headerFile, bagSizePower, buffer);
    }
}
Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) RandomAccessFile(java.io.RandomAccessFile) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) CountingOutputStream(com.google.common.io.CountingOutputStream) OutputStream(java.io.OutputStream) IAE(io.druid.java.util.common.IAE) FileInputStream(java.io.FileInputStream)

Example 4 with SmooshedWriter

use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class GenericIndexedWriter method writeHeaderLong.

private void writeHeaderLong(FileSmoosher smoosher, RandomAccessFile headerFile, int bagSizePower, byte[] buffer) throws IOException {
    ByteBuffer helperBuffer = ByteBuffer.allocate(Ints.BYTES).order(ByteOrder.nativeOrder());
    try (CountingOutputStream finalHeaderOut = new CountingOutputStream(ioPeon.makeOutputStream(makeFilename("header_final")))) {
        int numberOfElementsPerValueFile = 1 << bagSizePower;
        long currentNumBytes = 0;
        long relativeRefBytes = 0;
        long relativeNumBytes;
        headerFile.seek(0);
        // following block converts long header indexes into int header indexes.
        for (int pos = 0; pos < numWritten; pos++) {
            // to current offset.
            if ((pos & (numberOfElementsPerValueFile - 1)) == 0) {
                relativeRefBytes = currentNumBytes;
            }
            currentNumBytes = Long.reverseBytes(headerFile.readLong());
            relativeNumBytes = currentNumBytes - relativeRefBytes;
            writeIntValueToOutputStream(helperBuffer, Ints.checkedCast(relativeNumBytes), finalHeaderOut);
        }
        long numBytesToPutInFile = finalHeaderOut.getCount();
        finalHeaderOut.close();
        try (InputStream is = new FileInputStream(ioPeon.getFile(makeFilename("header_final")))) {
            try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateHeaderFileName(filenameBase), numBytesToPutInFile)) {
                writeBytesIntoSmooshedChannel(numBytesToPutInFile, buffer, smooshChannel, is);
            }
        }
    }
}
Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) CountingOutputStream(com.google.common.io.CountingOutputStream) FileInputStream(java.io.FileInputStream) InputStream(java.io.InputStream) ByteBuffer(java.nio.ByteBuffer) FileInputStream(java.io.FileInputStream)

Example 5 with SmooshedWriter

use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.

the class CompressedVSizeIndexedV3WriterTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
    File tmpDirectory = Files.createTempDirectory(String.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor)).toFile();
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    final IOPeon ioPeon = new TmpFileIOPeon();
    int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
    try {
        CompressedIntsIndexedWriter offsetWriter = new CompressedIntsIndexedWriter(offsetChunkFactor, compressionStrategy, new GenericIndexedWriter<>(ioPeon, "offset", CompressedIntBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, offsetChunkFactor), Longs.BYTES * 250000));
        GenericIndexedWriter genericIndexed = new GenericIndexedWriter<>(ioPeon, "value", CompressedByteBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, valueChunkFactor * VSizeIndexedInts.getNumBytesForMax(maxValue) + CompressedVSizeIntsIndexedSupplier.bufferPadding(VSizeIndexedInts.getNumBytesForMax(maxValue))), Longs.BYTES * 250000);
        CompressedVSizeIntsIndexedWriter valueWriter = new CompressedVSizeIntsIndexedWriter(ioPeon, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
        CompressedVSizeIndexedV3Writer writer = new CompressedVSizeIndexedV3Writer(offsetWriter, valueWriter);
        writer.open();
        for (int[] val : vals) {
            writer.add(val);
        }
        writer.close();
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeToChannel(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        CompressedVSizeIndexedV3Supplier supplierFromByteBuffer = CompressedVSizeIndexedV3Supplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
        IndexedMultivalue<IndexedInts> indexedMultivalue = supplierFromByteBuffer.get();
        assertEquals(indexedMultivalue.size(), vals.size());
        for (int i = 0; i < vals.size(); ++i) {
            IndexedInts subVals = indexedMultivalue.get(i);
            assertEquals(subVals.size(), vals.get(i).length);
            for (int j = 0; j < subVals.size(); ++j) {
                assertEquals(subVals.get(j), vals.get(i)[j]);
            }
        }
        CloseQuietly.close(indexedMultivalue);
        mapper.close();
    } finally {
        ioPeon.close();
    }
}
Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) CompressedVSizeIndexedV3Supplier(io.druid.segment.CompressedVSizeIndexedV3Supplier) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Aggregations

SmooshedWriter (io.druid.java.util.common.io.smoosh.SmooshedWriter)8 File (java.io.File)5 FileSmoosher (io.druid.java.util.common.io.smoosh.FileSmoosher)4 SmooshedFileMapper (io.druid.java.util.common.io.smoosh.SmooshedFileMapper)4 CountingOutputStream (com.google.common.io.CountingOutputStream)2 FileInputStream (java.io.FileInputStream)2 InputStream (java.io.InputStream)2 HyperLogLogCollector (io.druid.hll.HyperLogLogCollector)1 IAE (io.druid.java.util.common.IAE)1 CompressedVSizeIndexedV3Supplier (io.druid.segment.CompressedVSizeIndexedV3Supplier)1 Column (io.druid.segment.column.Column)1 ColumnBuilder (io.druid.segment.column.ColumnBuilder)1 ComplexColumn (io.druid.segment.column.ComplexColumn)1 IOPeon (io.druid.segment.data.IOPeon)1 TmpFileIOPeon (io.druid.segment.data.TmpFileIOPeon)1 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 OutputStream (java.io.OutputStream)1 RandomAccessFile (java.io.RandomAccessFile)1 ByteBuffer (java.nio.ByteBuffer)1 DateTime (org.joda.time.DateTime)1