Search in sources :

Example 1 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class CompressedLongsSerdeTest method testValues.

public void testValues(long[] values) throws Exception {
    ColumnarLongsSerializer serializer = CompressionFactory.getLongSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, encodingStrategy, compressionStrategy);
    serializer.open();
    for (long value : values) {
        serializer.add(value);
    }
    Assert.assertEquals(values.length, serializer.size());
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    serializer.writeTo(Channels.newChannel(baos), null);
    Assert.assertEquals(baos.size(), serializer.getSerializedSize());
    CompressedColumnarLongsSupplier supplier = CompressedColumnarLongsSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
    ColumnarLongs longs = supplier.get();
    assertIndexMatchesVals(longs, values);
    for (int i = 0; i < 10; i++) {
        int a = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
        int b = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
        int start = a < b ? a : b;
        int end = a < b ? b : a;
        tryFill(longs, values, start, end - start);
    }
    testSupplierSerde(supplier, values);
    testConcurrentThreadReads(supplier, longs, values);
    longs.close();
}
Also used : OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 2 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = temporaryFolder.newFolder();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(new ObjectColumnSelector() {

                        @Nullable
                        @Override
                        public Object getObject() {
                            return collector;
                        }

                        @Override
                        public Class classOfObject() {
                            return HyperLogLogCollector.class;
                        }

                        @Override
                        public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                        // doesn't matter in tests
                        }
                    });
                }
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeTo(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder, null);
            ColumnHolder columnHolder = builder.build();
            ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}
Also used : SmooshedWriter(org.apache.druid.java.util.common.io.smoosh.SmooshedWriter) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(org.apache.druid.segment.column.ColumnBuilder) File(java.io.File) Nullable(javax.annotation.Nullable) SmooshedFileMapper(org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(org.apache.druid.segment.column.ComplexColumn) ObjectColumnSelector(org.apache.druid.segment.ObjectColumnSelector) Test(org.junit.Test)

Example 3 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class CompressedFloatsSerdeTest method testWithValues.

public void testWithValues(float[] values) throws Exception {
    ColumnarFloatsSerializer serializer = CompressionFactory.getFloatSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, compressionStrategy);
    serializer.open();
    for (float value : values) {
        serializer.add(value);
    }
    Assert.assertEquals(values.length, serializer.size());
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    serializer.writeTo(Channels.newChannel(baos), null);
    Assert.assertEquals(baos.size(), serializer.getSerializedSize());
    CompressedColumnarFloatsSupplier supplier = CompressedColumnarFloatsSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
    ColumnarFloats floats = supplier.get();
    assertIndexMatchesVals(floats, values);
    for (int i = 0; i < 10; i++) {
        int a = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
        int b = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
        int start = a < b ? a : b;
        int end = a < b ? b : a;
        tryFill(floats, values, start, end - start);
    }
    testSupplierSerde(supplier, values);
    testConcurrentThreadReads(supplier, floats, values);
    floats.close();
}
Also used : OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) ByteArrayOutputStream(java.io.ByteArrayOutputStream)

Example 4 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class FloatCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.FLOAT, true, 1, 0d, ImmutableList.of(0f, 1.1f, 2.2f, 3.3f, 4.4f), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 1d);
    GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 3d);
    GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.FLOAT, true, 1, 0d, 1470187671, 2000000000);
    GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeContinuousUniform("", ValueType.FLOAT, true, 1, 0d, 0, 1000);
    Map<String, ColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((Float) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and FloatEncoding provided
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressionStrategy compression : COMPRESSIONS) {
            String name = entry.getKey() + "-" + compression;
            log.info("%s: ", name);
            File compFile = new File(dir, name);
            compFile.delete();
            File dataFile = new File(dir, entry.getKey());
            ColumnarFloatsSerializer writer = CompressionFactory.getFloatSerializer("float-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "float", ByteOrder.nativeOrder(), compression);
            try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
                FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                writer.open();
                String line;
                while ((line = br.readLine()) != null) {
                    writer.add(Float.parseFloat(line));
                }
                writer.writeTo(output, null);
            }
            log.info("%d", compFile.length() / 1024);
        }
    }
}
Also used : ColumnarFloatsSerializer(org.apache.druid.segment.data.ColumnarFloatsSerializer) HashMap(java.util.HashMap) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) ColumnValueGenerator(org.apache.druid.segment.generator.ColumnValueGenerator) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) BufferedReader(java.io.BufferedReader) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Writer(java.io.Writer)

Example 5 with OffHeapMemorySegmentWriteOutMedium

use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.

the class GenericIndexedBenchmark method createGenericIndexed.

@Setup(Level.Trial)
public void createGenericIndexed() throws IOException {
    GenericIndexedWriter<byte[]> genericIndexedWriter = new GenericIndexedWriter<>(new OffHeapMemorySegmentWriteOutMedium(), "genericIndexedBenchmark", BYTE_ARRAY_STRATEGY);
    genericIndexedWriter.open();
    // GenericIndexObject caches prevObject for comparison, so need two arrays for correct objectsSorted computation.
    ByteBuffer[] elements = new ByteBuffer[2];
    elements[0] = ByteBuffer.allocate(elementSize);
    elements[1] = ByteBuffer.allocate(elementSize);
    for (int i = 0; i < n; i++) {
        ByteBuffer element = elements[i & 1];
        element.putInt(0, i);
        genericIndexedWriter.write(element.array());
    }
    smooshDir = FileUtils.createTempDir();
    file = File.createTempFile("genericIndexedBenchmark", "meta");
    try (FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
        FileSmoosher fileSmoosher = new FileSmoosher(smooshDir)) {
        genericIndexedWriter.writeTo(fileChannel, fileSmoosher);
    }
    FileChannel fileChannel = FileChannel.open(file.toPath());
    MappedByteBuffer byteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, file.length());
    genericIndexed = GenericIndexed.read(byteBuffer, BYTE_ARRAY_STRATEGY, SmooshedFileMapper.load(smooshDir));
}
Also used : GenericIndexedWriter(org.apache.druid.segment.data.GenericIndexedWriter) MappedByteBuffer(java.nio.MappedByteBuffer) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) FileSmoosher(org.apache.druid.java.util.common.io.smoosh.FileSmoosher) ByteBuffer(java.nio.ByteBuffer) MappedByteBuffer(java.nio.MappedByteBuffer) Setup(org.openjdk.jmh.annotations.Setup)

Aggregations

OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)10 ByteArrayOutputStream (java.io.ByteArrayOutputStream)4 File (java.io.File)4 FileSmoosher (org.apache.druid.java.util.common.io.smoosh.FileSmoosher)4 FileChannel (java.nio.channels.FileChannel)3 SegmentWriteOutMedium (org.apache.druid.segment.writeout.SegmentWriteOutMedium)3 BufferedReader (java.io.BufferedReader)2 Writer (java.io.Writer)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 SmooshedFileMapper (org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper)2 SmooshedWriter (org.apache.druid.java.util.common.io.smoosh.SmooshedWriter)2 CompressionStrategy (org.apache.druid.segment.data.CompressionStrategy)2 ColumnValueGenerator (org.apache.druid.segment.generator.ColumnValueGenerator)2 GeneratorColumnSchema (org.apache.druid.segment.generator.GeneratorColumnSchema)2 ByteBuffer (java.nio.ByteBuffer)1 MappedByteBuffer (java.nio.MappedByteBuffer)1 Nullable (javax.annotation.Nullable)1 HyperLogLogCollector (org.apache.druid.hll.HyperLogLogCollector)1 RuntimeShapeInspector (org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector)1