Search in sources :

Example 6 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class LongCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
    GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
    GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
    GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
    Map<String, ColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((long) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and LongEncoding provided
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressionStrategy compression : COMPRESSIONS) {
            for (CompressionFactory.LongEncodingStrategy encoding : ENCODINGS) {
                String name = entry.getKey() + "-" + compression + "-" + encoding;
                log.info("%s: ", name);
                File compFile = new File(dir, name);
                compFile.delete();
                File dataFile = new File(dir, entry.getKey());
                ColumnarLongsSerializer writer = CompressionFactory.getLongSerializer("long-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "long", ByteOrder.nativeOrder(), encoding, compression);
                try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
                    FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                    writer.open();
                    String line;
                    while ((line = br.readLine()) != null) {
                        writer.add(Long.parseLong(line));
                    }
                    writer.writeTo(output, null);
                }
                log.info("%d", compFile.length() / 1024);
            }
        }
    }
}
Also used : ColumnarLongsSerializer(org.apache.druid.segment.data.ColumnarLongsSerializer) HashMap(java.util.HashMap) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) ColumnValueGenerator(org.apache.druid.segment.generator.ColumnValueGenerator) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) BufferedReader(java.io.BufferedReader) CompressionFactory(org.apache.druid.segment.data.CompressionFactory) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Writer(java.io.Writer)

Example 7 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
    File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
    try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
        CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
        GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
        CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
        V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
        writer.open();
        for (int[] val : vals) {
            writer.addValues(new ArrayBasedIndexedInts(val));
        }
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeTo(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
        ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
        Assert.assertEquals(columnarMultiInts.size(), vals.size());
        for (int i = 0; i < vals.size(); ++i) {
            IndexedInts subVals = columnarMultiInts.get(i);
            Assert.assertEquals(subVals.size(), vals.get(i).length);
            for (int j = 0, size = subVals.size(); j < size; ++j) {
                Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
            }
        }
        CloseableUtils.closeAll(columnarMultiInts, mapper);
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 8 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class CompressedDoublesSerdeTest method testWithValues.

public void testWithValues(double[] values) throws Exception {
    ColumnarDoublesSerializer serializer = CompressionFactory.getDoubleSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, compressionStrategy);
    serializer.open();
    for (double value : values) {
        serializer.add(value);
    }
    Assert.assertEquals(values.length, serializer.size());
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    serializer.writeTo(Channels.newChannel(baos), null);
    Assert.assertEquals(baos.size(), serializer.getSerializedSize());
    Supplier<ColumnarDoubles> supplier = CompressedColumnarDoublesSuppliers.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
    ColumnarDoubles doubles = supplier.get();
    assertIndexMatchesVals(doubles, values);
    for (int i = 0; i < 10; i++) {
        int a = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
        int b = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
        int start = a < b ? a : b;
        int end = a < b ? b : a;
        tryFill(doubles, values, start, end - start);
    }
    testConcurrentThreadReads(supplier, doubles, values);
    doubles.close();
}
Also used : OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 9 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class CompressedLongsAutoEncodingSerdeTest method testValues.

public void testValues(long[] values) throws Exception {
    ColumnarLongsSerializer serializer = CompressionFactory.getLongSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, encodingStrategy, compressionStrategy);
    serializer.open();
    for (long value : values) {
        serializer.add(value);
    }
    Assert.assertEquals(values.length, serializer.size());
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    serializer.writeTo(Channels.newChannel(baos), null);
    Assert.assertEquals(baos.size(), serializer.getSerializedSize());
    CompressedColumnarLongsSupplier supplier = CompressedColumnarLongsSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
    ColumnarLongs longs = supplier.get();
    assertIndexMatchesVals(longs, values);
    longs.close();
}
Also used : OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 10 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkSerializedSizeAndData.

private void checkSerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
    FileSmoosher smoosher = new FileSmoosher(temporaryFolder.newFolder());
    try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
        int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
        CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, "offset", offsetChunkFactor, byteOrder, compressionStrategy);
        CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy);
        V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
        V3CompressedVSizeColumnarMultiIntsSupplier supplierFromIterable = V3CompressedVSizeColumnarMultiIntsSupplier.fromIterable(Iterables.transform(vals, ArrayBasedIndexedInts::new), offsetChunkFactor, maxValue, byteOrder, compressionStrategy, segmentWriteOutMedium.getCloser());
        writer.open();
        for (int[] val : vals) {
            writer.addValues(new ArrayBasedIndexedInts(val));
        }
        long writtenLength = writer.getSerializedSize();
        final WriteOutBytes writeOutBytes = segmentWriteOutMedium.makeWriteOutBytes();
        writer.writeTo(writeOutBytes, smoosher);
        smoosher.close();
        Assert.assertEquals(writtenLength, supplierFromIterable.getSerializedSize());
        // read from ByteBuffer and check values
        V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(ByteBuffer.wrap(IOUtils.toByteArray(writeOutBytes.asInputStream())), byteOrder);
        try (final ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get()) {
            Assert.assertEquals(columnarMultiInts.size(), vals.size());
            for (int i = 0; i < vals.size(); ++i) {
                IndexedInts subVals = columnarMultiInts.get(i);
                Assert.assertEquals(subVals.size(), vals.get(i).length);
                for (int j = 0, size = subVals.size(); j < size; ++j) {
                    Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
                }
            }
        }
    }
}
Also used : OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) WriteOutBytes(org.apache.druid.segment.writeout.WriteOutBytes) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher)

Aggregations

OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)10 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 File (java.io.File)4 FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)4 FileChannel (java.nio.channels.FileChannel)3 SegmentWriteOutMedium (org.apache.druid.segment.writeout.SegmentWriteOutMedium)3 BufferedReader (java.io.BufferedReader)2 Writer (java.io.Writer)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 SmooshedFileMapper (org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)2 SmooshedWriter (org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)2 CompressionStrategy (org.apache.druid.segment.data.CompressionStrategy)2 ColumnValueGenerator (org.apache.druid.segment.generator.ColumnValueGenerator)2 GeneratorColumnSchema (org.apache.druid.segment.generator.GeneratorColumnSchema)2 ByteBuffer (java.nio.ByteBuffer)1 MappedByteBuffer (java.nio.MappedByteBuffer)1 Nullable (javax.annotation.Nullable)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)1