Search in sources :

Example 1 with ColumnarLongsSerializer

use of org.apache.druid.segment.data.ColumnarLongsSerializer in project druid by druid-io.

the class BaseColumnarLongsBenchmark method encodeToFile.

static int encodeToFile(long[] vals, String encoding, FileChannel output) throws IOException {
    SegmentWriteOutMedium writeOutMedium = new OnHeapMemorySegmentWriteOutMedium();
    ColumnarLongsSerializer serializer;
    switch(encoding) {
        case "lz4-longs":
            serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "lz4-longs", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.LONGS, CompressionStrategy.LZ4);
            break;
        case "lz4-auto":
            serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "lz4-auto", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.AUTO, CompressionStrategy.LZ4);
            break;
        case "none-longs":
            serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "none-longs", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.LONGS, CompressionStrategy.NONE);
            break;
        case "none-auto":
            serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "none-auto", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.AUTO, CompressionStrategy.NONE);
            break;
        default:
            throw new RuntimeException("unknown encoding");
    }
    serializer.open();
    for (long val : vals) {
        serializer.add(val);
    }
    serializer.writeTo(output, null);
    return (int) serializer.getSerializedSize();
}
Also used : OnHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium) SegmentWriteOutMedium(org.apache.druid.segment.writeout.SegmentWriteOutMedium) ColumnarLongsSerializer(org.apache.druid.segment.data.ColumnarLongsSerializer) OnHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium)

Example 2 with ColumnarLongsSerializer

use of org.apache.druid.segment.data.ColumnarLongsSerializer in project druid by druid-io.

the class LongCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
    GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
    GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
    GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
    Map<String, ColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((long) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and LongEncoding provided
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressionStrategy compression : COMPRESSIONS) {
            for (CompressionFactory.LongEncodingStrategy encoding : ENCODINGS) {
                String name = entry.getKey() + "-" + compression + "-" + encoding;
                log.info("%s: ", name);
                File compFile = new File(dir, name);
                compFile.delete();
                File dataFile = new File(dir, entry.getKey());
                ColumnarLongsSerializer writer = CompressionFactory.getLongSerializer("long-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "long", ByteOrder.nativeOrder(), encoding, compression);
                try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
                    FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                    writer.open();
                    String line;
                    while ((line = br.readLine()) != null) {
                        writer.add(Long.parseLong(line));
                    }
                    writer.writeTo(output, null);
                }
                log.info("%d", compFile.length() / 1024);
            }
        }
    }
}
Also used : ColumnarLongsSerializer(org.apache.druid.segment.data.ColumnarLongsSerializer) HashMap(java.util.HashMap) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) ColumnValueGenerator(org.apache.druid.segment.generator.ColumnValueGenerator) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) BufferedReader(java.io.BufferedReader) CompressionFactory(org.apache.druid.segment.data.CompressionFactory) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Writer(java.io.Writer)

Aggregations

ColumnarLongsSerializer (org.apache.druid.segment.data.ColumnarLongsSerializer)2 BufferedReader (java.io.BufferedReader)1 File (java.io.File)1 Writer (java.io.Writer)1 FileChannel (java.nio.channels.FileChannel)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 CompressionFactory (org.apache.druid.segment.data.CompressionFactory)1 CompressionStrategy (org.apache.druid.segment.data.CompressionStrategy)1 ColumnValueGenerator (org.apache.druid.segment.generator.ColumnValueGenerator)1 GeneratorColumnSchema (org.apache.druid.segment.generator.GeneratorColumnSchema)1 OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)1 OnHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OnHeapMemorySegmentWriteOutMedium)1 SegmentWriteOutMedium (org.apache.druid.segment.writeout.SegmentWriteOutMedium)1