use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.
the class LongCompressionBenchmarkFileGenerator method main.
public static void main(String[] args) throws IOException, URISyntaxException {
if (args.length >= 1) {
dirPath = args[0];
}
BenchmarkColumnSchema enumeratedSchema = BenchmarkColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.<Object>of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
BenchmarkColumnSchema zipfLowSchema = BenchmarkColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
BenchmarkColumnSchema zipfHighSchema = BenchmarkColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
BenchmarkColumnSchema sequentialSchema = BenchmarkColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
BenchmarkColumnSchema uniformSchema = BenchmarkColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
Map<String, BenchmarkColumnValueGenerator> generators = new HashMap<>();
generators.put("enumerate", new BenchmarkColumnValueGenerator(enumeratedSchema, 1));
generators.put("zipfLow", new BenchmarkColumnValueGenerator(zipfLowSchema, 1));
generators.put("zipfHigh", new BenchmarkColumnValueGenerator(zipfHighSchema, 1));
generators.put("sequential", new BenchmarkColumnValueGenerator(sequentialSchema, 1));
generators.put("uniform", new BenchmarkColumnValueGenerator(uniformSchema, 1));
File dir = new File(dirPath);
dir.mkdir();
// create data files using BenchmarkColunValueGenerator
for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
final File dataFile = new File(dir, entry.getKey());
dataFile.delete();
try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile)))) {
for (int i = 0; i < ROW_NUM; i++) {
writer.write((long) entry.getValue().generateRowValue() + "\n");
}
}
}
// create compressed files using all combinations of CompressionStrategy and LongEncoding provided
for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
for (CompressedObjectStrategy.CompressionStrategy compression : compressions) {
for (CompressionFactory.LongEncodingStrategy encoding : encodings) {
String name = entry.getKey() + "-" + compression.toString() + "-" + encoding.toString();
System.out.print(name + ": ");
File compFile = new File(dir, name);
compFile.delete();
File dataFile = new File(dir, entry.getKey());
TmpFileIOPeon iopeon = new TmpFileIOPeon(true);
LongSupplierSerializer writer = CompressionFactory.getLongSerializer(iopeon, "long", ByteOrder.nativeOrder(), encoding, compression);
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(dataFile)));
try (FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
writer.open();
String line;
while ((line = br.readLine()) != null) {
writer.add(Long.parseLong(line));
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
writer.closeAndConsolidate(new ByteSink() {
@Override
public OutputStream openStream() throws IOException {
return baos;
}
});
output.write(ByteBuffer.wrap(baos.toByteArray()));
} finally {
iopeon.close();
br.close();
}
System.out.print(compFile.length() / 1024 + "\n");
}
}
}
}
use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.
the class BenchmarkDataGeneratorTest method testDiscreteUniform.
@Test
public void testDiscreteUniform() throws Exception {
List<BenchmarkColumnSchema> schemas = new ArrayList<>();
RowValueTracker tracker = new RowValueTracker();
schemas.add(BenchmarkColumnSchema.makeDiscreteUniform("dimA", ValueType.STRING, false, 1, null, 10, 20));
schemas.add(BenchmarkColumnSchema.makeEnumeratedDiscreteUniform("dimB", ValueType.STRING, false, 4, null, Arrays.<Object>asList("Hello", "World", "Foo", "Bar")));
schemas.add(BenchmarkColumnSchema.makeDiscreteUniform("dimC", ValueType.STRING, false, 1, 0.50, 10, 20));
schemas.add(BenchmarkColumnSchema.makeDiscreteUniform("dimD", ValueType.FLOAT, false, 1, null, 100, 120));
BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("U-ROW: " + row);
tracker.addRow(row);
}
tracker.printStuff();
}
use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.
the class BenchmarkDataGeneratorTest method testSequential.
@Test
public void testSequential() throws Exception {
List<BenchmarkColumnSchema> schemas = new ArrayList<>();
RowValueTracker tracker = new RowValueTracker();
schemas.add(BenchmarkColumnSchema.makeSequential("dimA", ValueType.STRING, false, 1, null, 10, 20));
schemas.add(BenchmarkColumnSchema.makeEnumeratedSequential("dimB", ValueType.STRING, false, 1, null, Arrays.<Object>asList("Hello", "World", "Foo", "Bar")));
schemas.add(BenchmarkColumnSchema.makeSequential("dimC", ValueType.STRING, false, 1, 0.50, 30, 40));
BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 100; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("S-ROW: " + row);
tracker.addRow(row);
}
tracker.printStuff();
}
use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.
the class FloatCompressionBenchmarkFileGenerator method main.
public static void main(String[] args) throws IOException, URISyntaxException {
if (args.length >= 1) {
dirPath = args[0];
}
BenchmarkColumnSchema enumeratedSchema = BenchmarkColumnSchema.makeEnumerated("", ValueType.FLOAT, true, 1, 0d, ImmutableList.<Object>of(0f, 1.1f, 2.2f, 3.3f, 4.4f), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
BenchmarkColumnSchema zipfLowSchema = BenchmarkColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 1d);
BenchmarkColumnSchema zipfHighSchema = BenchmarkColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 3d);
BenchmarkColumnSchema sequentialSchema = BenchmarkColumnSchema.makeSequential("", ValueType.FLOAT, true, 1, 0d, 1470187671, 2000000000);
BenchmarkColumnSchema uniformSchema = BenchmarkColumnSchema.makeContinuousUniform("", ValueType.FLOAT, true, 1, 0d, 0, 1000);
Map<String, BenchmarkColumnValueGenerator> generators = new HashMap<>();
generators.put("enumerate", new BenchmarkColumnValueGenerator(enumeratedSchema, 1));
generators.put("zipfLow", new BenchmarkColumnValueGenerator(zipfLowSchema, 1));
generators.put("zipfHigh", new BenchmarkColumnValueGenerator(zipfHighSchema, 1));
generators.put("sequential", new BenchmarkColumnValueGenerator(sequentialSchema, 1));
generators.put("uniform", new BenchmarkColumnValueGenerator(uniformSchema, 1));
File dir = new File(dirPath);
dir.mkdir();
// create data files using BenchmarkColunValueGenerator
for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
final File dataFile = new File(dir, entry.getKey());
dataFile.delete();
try (Writer writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(dataFile)))) {
for (int i = 0; i < ROW_NUM; i++) {
writer.write((Float) entry.getValue().generateRowValue() + "\n");
}
}
}
// create compressed files using all combinations of CompressionStrategy and FloatEncoding provided
for (Map.Entry<String, BenchmarkColumnValueGenerator> entry : generators.entrySet()) {
for (CompressedObjectStrategy.CompressionStrategy compression : compressions) {
String name = entry.getKey() + "-" + compression.toString();
System.out.print(name + ": ");
File compFile = new File(dir, name);
compFile.delete();
File dataFile = new File(dir, entry.getKey());
TmpFileIOPeon iopeon = new TmpFileIOPeon(true);
FloatSupplierSerializer writer = CompressionFactory.getFloatSerializer(iopeon, "float", ByteOrder.nativeOrder(), compression);
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(dataFile)));
try (FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
writer.open();
String line;
while ((line = br.readLine()) != null) {
writer.add(Float.parseFloat(line));
}
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
writer.closeAndConsolidate(new ByteSink() {
@Override
public OutputStream openStream() throws IOException {
return baos;
}
});
output.write(ByteBuffer.wrap(baos.toByteArray()));
} finally {
iopeon.close();
br.close();
}
System.out.print(compFile.length() / 1024 + "\n");
}
}
}
use of io.druid.benchmark.datagen.BenchmarkColumnSchema in project druid by druid-io.
the class BenchmarkDataGeneratorTest method testEnumerated.
@Test
public void testEnumerated() throws Exception {
List<BenchmarkColumnSchema> schemas = new ArrayList<>();
RowValueTracker tracker = new RowValueTracker();
schemas.add(BenchmarkColumnSchema.makeEnumerated("dimA", ValueType.STRING, false, 1, null, Arrays.<Object>asList("Hello", "World", "Foo", "Bar"), Arrays.<Double>asList(0.5, 0.25, 0.15, 0.10)));
BenchmarkDataGenerator dataGenerator = new BenchmarkDataGenerator(schemas, 9999, 0, 0, 1000.0);
for (int i = 0; i < 10000; i++) {
InputRow row = dataGenerator.nextRow();
//System.out.println("Z-ROW: " + row);
tracker.addRow(row);
}
tracker.printStuff();
}
Aggregations