Search in sources :

Example 1 with GeneratorColumnSchema

use of org.apache.druid.segment.generator.GeneratorColumnSchema in project druid by druid-io.

the class FloatCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.FLOAT, true, 1, 0d, ImmutableList.of(0f, 1.1f, 2.2f, 3.3f, 4.4f), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 1d);
    GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 3d);
    GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.FLOAT, true, 1, 0d, 1470187671, 2000000000);
    GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeContinuousUniform("", ValueType.FLOAT, true, 1, 0d, 0, 1000);
    Map<String, ColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((Float) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and FloatEncoding provided
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressionStrategy compression : COMPRESSIONS) {
            String name = entry.getKey() + "-" + compression;
            log.info("%s: ", name);
            File compFile = new File(dir, name);
            compFile.delete();
            File dataFile = new File(dir, entry.getKey());
            ColumnarFloatsSerializer writer = CompressionFactory.getFloatSerializer("float-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "float", ByteOrder.nativeOrder(), compression);
            try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
                FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                writer.open();
                String line;
                while ((line = br.readLine()) != null) {
                    writer.add(Float.parseFloat(line));
                }
                writer.writeTo(output, null);
            }
            log.info("%d", compFile.length() / 1024);
        }
    }
}
Also used : ColumnarFloatsSerializer(org.apache.druid.segment.data.ColumnarFloatsSerializer) HashMap(java.util.HashMap) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) ColumnValueGenerator(org.apache.druid.segment.generator.ColumnValueGenerator) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) BufferedReader(java.io.BufferedReader) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Writer(java.io.Writer)

Example 2 with GeneratorColumnSchema

use of org.apache.druid.segment.generator.GeneratorColumnSchema in project druid by druid-io.

the class IndexedTableJoinCursorBenchmark method makeQueryableIndexSegment.

public static QueryableIndexSegment makeQueryableIndexSegment(Closer closer, String dataSource, int rowsPerSegment) {
    final List<GeneratorColumnSchema> schemaColumnsInfo = ImmutableList.of(GeneratorColumnSchema.makeSequential("stringKey", ValueType.STRING, false, 1, null, 0, rowsPerSegment), GeneratorColumnSchema.makeSequential("longKey", ValueType.LONG, false, 1, null, 0, rowsPerSegment), GeneratorColumnSchema.makeLazyZipf("string1", ValueType.STRING, false, 1, 0.1, 0, rowsPerSegment, 2.0), GeneratorColumnSchema.makeLazyZipf("string2", ValueType.STRING, false, 1, 0.3, 0, 1000000, 1.5), GeneratorColumnSchema.makeLazyZipf("string3", ValueType.STRING, false, 1, 0.12, 0, 1000, 1.25), GeneratorColumnSchema.makeLazyZipf("string4", ValueType.STRING, false, 1, 0.22, 0, 12000, 3.0), GeneratorColumnSchema.makeLazyZipf("string5", ValueType.STRING, false, 1, 0.05, 0, 33333, 1.8), GeneratorColumnSchema.makeLazyZipf("long1", ValueType.LONG, false, 1, 0.1, 0, 1001, 2.0), GeneratorColumnSchema.makeLazyZipf("long2", ValueType.LONG, false, 1, 0.01, 0, 666666, 2.2), GeneratorColumnSchema.makeLazyZipf("long3", ValueType.LONG, false, 1, 0.12, 0, 1000000, 2.5), GeneratorColumnSchema.makeLazyZipf("long4", ValueType.LONG, false, 1, 0.4, 0, 23, 1.2), GeneratorColumnSchema.makeLazyZipf("long5", ValueType.LONG, false, 1, 0.33, 0, 9999, 1.5), GeneratorColumnSchema.makeLazyZipf("double1", ValueType.DOUBLE, false, 1, 0.1, 0, 333, 2.2), GeneratorColumnSchema.makeLazyZipf("double2", ValueType.DOUBLE, false, 1, 0.01, 0, 4021, 2.5), GeneratorColumnSchema.makeLazyZipf("double3", ValueType.DOUBLE, false, 1, 0.41, 0, 90210, 4.0), GeneratorColumnSchema.makeLazyZipf("double4", ValueType.DOUBLE, false, 1, 0.5, 0, 5555555, 1.2), GeneratorColumnSchema.makeLazyZipf("double5", ValueType.DOUBLE, false, 1, 0.23, 0, 80, 1.8), GeneratorColumnSchema.makeLazyZipf("float1", ValueType.FLOAT, false, 1, 0.11, 0, 1000000, 1.7), GeneratorColumnSchema.makeLazyZipf("float2", ValueType.FLOAT, false, 1, 0.4, 0, 10, 1.5), GeneratorColumnSchema.makeLazyZipf("float3", ValueType.FLOAT, false, 1, 0.8, 0, 5000, 2.3), GeneratorColumnSchema.makeLazyZipf("float4", ValueType.FLOAT, false, 1, 0.999, 0, 14440, 2.0), GeneratorColumnSchema.makeLazyZipf("float5", ValueType.FLOAT, false, 1, 0.001, 0, 1029, 1.5));
    final List<AggregatorFactory> aggs = new ArrayList<>();
    aggs.add(new CountAggregatorFactory("rows"));
    final Interval interval = Intervals.of("2000-01-01/P1D");
    final GeneratorSchemaInfo schema = new GeneratorSchemaInfo(schemaColumnsInfo, aggs, interval, false);
    final DataSegment dataSegment = DataSegment.builder().dataSource(dataSource).interval(schema.getDataInterval()).version("1").shardSpec(new LinearShardSpec(0)).size(0).build();
    final QueryableIndex index = closer.register(new SegmentGenerator()).generate(dataSegment, schema, Granularities.NONE, rowsPerSegment);
    return closer.register(new QueryableIndexSegment(index, SegmentId.dummy(dataSource)));
}
Also used : QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) ArrayList(java.util.ArrayList) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) Interval(org.joda.time.Interval)

Example 3 with GeneratorColumnSchema

use of org.apache.druid.segment.generator.GeneratorColumnSchema in project druid by druid-io.

the class LongCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
    GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
    GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
    GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
    Map<String, ColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((long) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and LongEncoding provided
    for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressionStrategy compression : COMPRESSIONS) {
            for (CompressionFactory.LongEncodingStrategy encoding : ENCODINGS) {
                String name = entry.getKey() + "-" + compression + "-" + encoding;
                log.info("%s: ", name);
                File compFile = new File(dir, name);
                compFile.delete();
                File dataFile = new File(dir, entry.getKey());
                ColumnarLongsSerializer writer = CompressionFactory.getLongSerializer("long-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "long", ByteOrder.nativeOrder(), encoding, compression);
                try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
                    FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                    writer.open();
                    String line;
                    while ((line = br.readLine()) != null) {
                        writer.add(Long.parseLong(line));
                    }
                    writer.writeTo(output, null);
                }
                log.info("%d", compFile.length() / 1024);
            }
        }
    }
}
Also used : ColumnarLongsSerializer(org.apache.druid.segment.data.ColumnarLongsSerializer) HashMap(java.util.HashMap) OffHeapMemorySegmentWriteOutMedium(org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium) FileChannel(java.nio.channels.FileChannel) ColumnValueGenerator(org.apache.druid.segment.generator.ColumnValueGenerator) CompressionStrategy(org.apache.druid.segment.data.CompressionStrategy) GeneratorColumnSchema(org.apache.druid.segment.generator.GeneratorColumnSchema) BufferedReader(java.io.BufferedReader) CompressionFactory(org.apache.druid.segment.data.CompressionFactory) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) Writer(java.io.Writer)

Aggregations

GeneratorColumnSchema (org.apache.druid.segment.generator.GeneratorColumnSchema)3 BufferedReader (java.io.BufferedReader)2 File (java.io.File)2 Writer (java.io.Writer)2 FileChannel (java.nio.channels.FileChannel)2 HashMap (java.util.HashMap)2 Map (java.util.Map)2 CompressionStrategy (org.apache.druid.segment.data.CompressionStrategy)2 ColumnValueGenerator (org.apache.druid.segment.generator.ColumnValueGenerator)2 OffHeapMemorySegmentWriteOutMedium (org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium)2 ArrayList (java.util.ArrayList)1 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)1 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)1 QueryableIndex (org.apache.druid.segment.QueryableIndex)1 QueryableIndexSegment (org.apache.druid.segment.QueryableIndexSegment)1 ColumnarFloatsSerializer (org.apache.druid.segment.data.ColumnarFloatsSerializer)1 ColumnarLongsSerializer (org.apache.druid.segment.data.ColumnarLongsSerializer)1 CompressionFactory (org.apache.druid.segment.data.CompressionFactory)1 GeneratorSchemaInfo (org.apache.druid.segment.generator.GeneratorSchemaInfo)1 SegmentGenerator (org.apache.druid.segment.generator.SegmentGenerator)1