Search in sources :

Example 1 with BenchmarkColumnSchema

use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.

the class LongCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException, URISyntaxException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    BenchmarkColumnSchema enumeratedSchema = BenchmarkColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.<Object>of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    BenchmarkColumnSchema zipfLowSchema = BenchmarkColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
    BenchmarkColumnSchema zipfHighSchema = BenchmarkColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
    BenchmarkColumnSchema sequentialSchema = BenchmarkColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
    BenchmarkColumnSchema uniformSchema = BenchmarkColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
    Map<String, BenchmarkColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new BenchmarkColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new BenchmarkColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new BenchmarkColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new BenchmarkColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new BenchmarkColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile)))) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((long) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and LongEncoding provided
    for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressedObjectStrategy.CompressionStrategy compression : compressions) {
            for (CompressionFactory.LongEncodingStrategy encoding : encodings) {
                String name = entry.getKey() + "-" + compression.toString() + "-" + encoding.toString();
                System.out.print(name + ": ");
                File compFile = new File(dir, name);
                compFile.delete();
                File dataFile = new File(dir, entry.getKey());
                TmpFileIOPeon iopeon = new TmpFileIOPeon(true);
                LongSupplierSerializer writer = CompressionFactory.getLongSerializer(iopeon, "long", ByteOrder.nativeOrder(), encoding, compression);
                BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(dataFile)));
                try (FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                    writer.open();
                    String line;
                    while ((line = br.readLine()) != null) {
                        writer.add(Long.parseLong(line));
                    }
                    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    writer.closeAndConsolidate(new ByteSink() {

                        @Override
                        public OutputStream openStream() throws IOException {
                            return baos;
                        }
                    });
                    output.write(ByteBuffer.wrap(baos.toByteArray()));
                } finally {
                    iopeon.close();
                    br.close();
                }
                System.out.print(compFile.length() / 1024 + "\n");
            }
        }
    }
}
Also used : HashMap(java.util.HashMap) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) BufferedWriter(java.io.BufferedWriter) ByteSink(com.google.common.io.ByteSink) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) InputStreamReader(java.io.InputStreamReader) FileChannel(java.nio.channels.FileChannel) BenchmarkColumnValueGenerator(io.druid.benchmark.datagen.BenchmarkColumnValueGenerator) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) CompressionFactory(io.druid.segment.data.CompressionFactory) BenchmarkColumnSchema(io.druid.benchmark.datagen.BenchmarkColumnSchema) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer) LongSupplierSerializer(io.druid.segment.data.LongSupplierSerializer)

Example 2 with BenchmarkColumnSchema

use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.

the class BenchmarkDataGeneratorTest method testDiscreteUniform.

@Test
public void testDiscreteUniform() throws Exception {
    List<BenchmarkColumnSchema> schemas = new ArrayList<>();
    RowValueTracker tracker = new RowValueTracker();
    schemas.add(BenchmarkColumnSchema.makeDiscreteUniform("dimA", ValueType.STRING, false, 1, null, 10, 20));
    schemas.add(BenchmarkColumnSchema.makeEnumeratedDiscreteUniform("dimB", ValueType.STRING, false, 4, null, Arrays.<Object>asList("Hello", "World", "Foo", "Bar")));
    schemas.add(BenchmarkColumnSchema.makeDiscreteUniform("dimC", ValueType.STRING, false, 1, 0.50, 10, 20));
    schemas.add(BenchmarkColumnSchema.makeDiscreteUniform("dimD", ValueType.FLOAT, false, 1, null, 100, 120));
    BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemas, 9999, 0, 0, 1000.0);
    for (int i = 0; i < 100; i++) {
        InputRow row = dataGenerator.nextRow();
        //System.out.println("U-ROW: " + row);
        tracker.addRow(row);
    }
    tracker.printStuff();
}
Also used : ArrayList(java.util.ArrayList) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) InputRow(io.druid.data.input.InputRow) BenchmarkColumnSchema(io.druid.benchmark.datagen.BenchmarkColumnSchema) Test(org.junit.Test)

Example 3 with BenchmarkColumnSchema

use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.

the class BenchmarkDataGeneratorTest method testSequential.

@Test
public void testSequential() throws Exception {
    List<BenchmarkColumnSchema> schemas = new ArrayList<>();
    RowValueTracker tracker = new RowValueTracker();
    schemas.add(BenchmarkColumnSchema.makeSequential("dimA", ValueType.STRING, false, 1, null, 10, 20));
    schemas.add(BenchmarkColumnSchema.makeEnumeratedSequential("dimB", ValueType.STRING, false, 1, null, Arrays.<Object>asList("Hello", "World", "Foo", "Bar")));
    schemas.add(BenchmarkColumnSchema.makeSequential("dimC", ValueType.STRING, false, 1, 0.50, 30, 40));
    BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemas, 9999, 0, 0, 1000.0);
    for (int i = 0; i < 100; i++) {
        InputRow row = dataGenerator.nextRow();
        //System.out.println("S-ROW: " + row);
        tracker.addRow(row);
    }
    tracker.printStuff();
}
Also used : ArrayList(java.util.ArrayList) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) InputRow(io.druid.data.input.InputRow) BenchmarkColumnSchema(io.druid.benchmark.datagen.BenchmarkColumnSchema) Test(org.junit.Test)

Example 4 with BenchmarkColumnSchema

use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.

the class FloatCompressionBenchmarkFileGenerator method main.

public static void main(String[] args) throws IOException, URISyntaxException {
    if (args.length >= 1) {
        dirPath = args[0];
    }
    BenchmarkColumnSchema enumeratedSchema = BenchmarkColumnSchema.makeEnumerated("", ValueType.FLOAT, true, 1, 0d, ImmutableList.<Object>of(0f, 1.1f, 2.2f, 3.3f, 4.4f), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
    BenchmarkColumnSchema zipfLowSchema = BenchmarkColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 1d);
    BenchmarkColumnSchema zipfHighSchema = BenchmarkColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 3d);
    BenchmarkColumnSchema sequentialSchema = BenchmarkColumnSchema.makeSequential("", ValueType.FLOAT, true, 1, 0d, 1470187671, 2000000000);
    BenchmarkColumnSchema uniformSchema = BenchmarkColumnSchema.makeContinuousUniform("", ValueType.FLOAT, true, 1, 0d, 0, 1000);
    Map<String, BenchmarkColumnValueGenerator> generators = new HashMap<>();
    generators.put("enumerate", new BenchmarkColumnValueGenerator(enumeratedSchema, 1));
    generators.put("zipfLow", new BenchmarkColumnValueGenerator(zipfLowSchema, 1));
    generators.put("zipfHigh", new BenchmarkColumnValueGenerator(zipfHighSchema, 1));
    generators.put("sequential", new BenchmarkColumnValueGenerator(sequentialSchema, 1));
    generators.put("uniform", new BenchmarkColumnValueGenerator(uniformSchema, 1));
    File dir = new File(dirPath);
    dir.mkdir();
    // create data files using BenchmarkColunValueGenerator
    for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
        final File dataFile = new File(dir, entry.getKey());
        dataFile.delete();
        try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile)))) {
            for (int i = 0; i < ROW_NUM; i++) {
                writer.write((Float) entry.getValue().generateRowValue() + "\n");
            }
        }
    }
    // create compressed files using all combinations of CompressionStrategy and FloatEncoding provided
    for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
        for (CompressedObjectStrategy.CompressionStrategy compression : compressions) {
            String name = entry.getKey() + "-" + compression.toString();
            System.out.print(name + ": ");
            File compFile = new File(dir, name);
            compFile.delete();
            File dataFile = new File(dir, entry.getKey());
            TmpFileIOPeon iopeon = new TmpFileIOPeon(true);
            FloatSupplierSerializer writer = CompressionFactory.getFloatSerializer(iopeon, "float", ByteOrder.nativeOrder(), compression);
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(dataFile)));
            try (FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
                writer.open();
                String line;
                while ((line = br.readLine()) != null) {
                    writer.add(Float.parseFloat(line));
                }
                final ByteArrayOutputStream baos = new ByteArrayOutputStream();
                writer.closeAndConsolidate(new ByteSink() {

                    @Override
                    public OutputStream openStream() throws IOException {
                        return baos;
                    }
                });
                output.write(ByteBuffer.wrap(baos.toByteArray()));
            } finally {
                iopeon.close();
                br.close();
            }
            System.out.print(compFile.length() / 1024 + "\n");
        }
    }
}
Also used : HashMap(java.util.HashMap) ByteArrayOutputStream(java.io.ByteArrayOutputStream) OutputStream(java.io.OutputStream) FileOutputStream(java.io.FileOutputStream) CompressedObjectStrategy(io.druid.segment.data.CompressedObjectStrategy) BufferedWriter(java.io.BufferedWriter) ByteSink(com.google.common.io.ByteSink) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FloatSupplierSerializer(io.druid.segment.data.FloatSupplierSerializer) InputStreamReader(java.io.InputStreamReader) FileChannel(java.nio.channels.FileChannel) BenchmarkColumnValueGenerator(io.druid.benchmark.datagen.BenchmarkColumnValueGenerator) ByteArrayOutputStream(java.io.ByteArrayOutputStream) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) BufferedReader(java.io.BufferedReader) OutputStreamWriter(java.io.OutputStreamWriter) BenchmarkColumnSchema(io.druid.benchmark.datagen.BenchmarkColumnSchema) File(java.io.File) HashMap(java.util.HashMap) Map(java.util.Map) OutputStreamWriter(java.io.OutputStreamWriter) BufferedWriter(java.io.BufferedWriter) Writer(java.io.Writer)

Example 5 with BenchmarkColumnSchema

use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.

the class BenchmarkDataGeneratorTest method testEnumerated.

@Test
public void testEnumerated() throws Exception {
    List<BenchmarkColumnSchema> schemas = new ArrayList<>();
    RowValueTracker tracker = new RowValueTracker();
    schemas.add(BenchmarkColumnSchema.makeEnumerated("dimA", ValueType.STRING, false, 1, null, Arrays.<Object>asList("Hello", "World", "Foo", "Bar"), Arrays.<Double>asList(0.5, 0.25, 0.15, 0.10)));
    BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemas, 9999, 0, 0, 1000.0);
    for (int i = 0; i < 10000; i++) {
        InputRow row = dataGenerator.nextRow();
        //System.out.println("Z-ROW: " + row);
        tracker.addRow(row);
    }
    tracker.printStuff();
}
Also used : ArrayList(java.util.ArrayList) BenchmarkDataGenerator(io.druid.benchmark.datagen.BenchmarkDataGenerator) InputRow(io.druid.data.input.InputRow) BenchmarkColumnSchema(io.druid.benchmark.datagen.BenchmarkColumnSchema) Test(org.junit.Test)

Aggregations

BenchmarkColumnSchema (io.druid.benchmark.datagen.BenchmarkColumnSchema)10 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)8 InputRow (io.druid.data.input.InputRow)8 ArrayList (java.util.ArrayList)8 Test (org.junit.Test)8 ByteSink (com.google.common.io.ByteSink)2 BenchmarkColumnValueGenerator (io.druid.benchmark.datagen.BenchmarkColumnValueGenerator)2 CompressedObjectStrategy (io.druid.segment.data.CompressedObjectStrategy)2 TmpFileIOPeon (io.druid.segment.data.TmpFileIOPeon)2 BufferedReader (java.io.BufferedReader)2 BufferedWriter (java.io.BufferedWriter)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)2 File (java.io.File)2 FileInputStream (java.io.FileInputStream)2 FileOutputStream (java.io.FileOutputStream)2 IOException (java.io.IOException)2 InputStreamReader (java.io.InputStreamReader)2 OutputStream (java.io.OutputStream)2 OutputStreamWriter (java.io.OutputStreamWriter)2 Writer (java.io.Writer)2