use of org.apache.druid.segment.data.CompressionStrategy in project druid by druid-io.
the class DictionaryEncodedColumnMerger method setupEncodedValueWriter.
protected void setupEncodedValueWriter() throws IOException {
final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression();
String filenameBase = StringUtils.format("%s.forward_dim", dimensionName);
if (capabilities.hasMultipleValues().isTrue()) {
if (compressionStrategy != CompressionStrategy.UNCOMPRESSED) {
encodedValueSerializer = V3CompressedVSizeColumnarMultiIntsSerializer.create(dimensionName, segmentWriteOutMedium, filenameBase, cardinality, compressionStrategy);
} else {
encodedValueSerializer = new VSizeColumnarMultiIntsSerializer(dimensionName, segmentWriteOutMedium, cardinality);
}
} else {
if (compressionStrategy != CompressionStrategy.UNCOMPRESSED) {
encodedValueSerializer = CompressedVSizeColumnarIntsSerializer.create(dimensionName, segmentWriteOutMedium, filenameBase, cardinality, compressionStrategy);
} else {
encodedValueSerializer = new VSizeColumnarIntsSerializer(segmentWriteOutMedium, cardinality);
}
}
encodedValueSerializer.open();
}
use of org.apache.druid.segment.data.CompressionStrategy in project druid by druid-io.
the class FloatCompressionBenchmarkFileGenerator method main.
public static void main(String[] args) throws IOException {
if (args.length >= 1) {
dirPath = args[0];
}
GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.FLOAT, true, 1, 0d, ImmutableList.of(0f, 1.1f, 2.2f, 3.3f, 4.4f), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 1d);
GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 3d);
GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.FLOAT, true, 1, 0d, 1470187671, 2000000000);
GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeContinuousUniform("", ValueType.FLOAT, true, 1, 0d, 0, 1000);
Map<String, ColumnValueGenerator> generators = new HashMap<>();
generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
File dir = new File(dirPath);
dir.mkdir();
// create data files using BenchmarkColunValueGenerator
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
final File dataFile = new File(dir, entry.getKey());
dataFile.delete();
try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
for (int i = 0; i < ROW_NUM; i++) {
writer.write((Float) entry.getValue().generateRowValue() + "\n");
}
}
}
// create compressed files using all combinations of CompressionStrategy and FloatEncoding provided
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
for (CompressionStrategy compression : COMPRESSIONS) {
String name = entry.getKey() + "-" + compression;
log.info("%s: ", name);
File compFile = new File(dir, name);
compFile.delete();
File dataFile = new File(dir, entry.getKey());
ColumnarFloatsSerializer writer = CompressionFactory.getFloatSerializer("float-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "float", ByteOrder.nativeOrder(), compression);
try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
writer.open();
String line;
while ((line = br.readLine()) != null) {
writer.add(Float.parseFloat(line));
}
writer.writeTo(output, null);
}
log.info("%d", compFile.length() / 1024);
}
}
}
use of org.apache.druid.segment.data.CompressionStrategy in project druid by druid-io.
the class LongCompressionBenchmarkFileGenerator method main.
public static void main(String[] args) throws IOException {
if (args.length >= 1) {
dirPath = args[0];
}
GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
Map<String, ColumnValueGenerator> generators = new HashMap<>();
generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
File dir = new File(dirPath);
dir.mkdir();
// create data files using BenchmarkColunValueGenerator
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
final File dataFile = new File(dir, entry.getKey());
dataFile.delete();
try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
for (int i = 0; i < ROW_NUM; i++) {
writer.write((long) entry.getValue().generateRowValue() + "\n");
}
}
}
// create compressed files using all combinations of CompressionStrategy and LongEncoding provided
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
for (CompressionStrategy compression : COMPRESSIONS) {
for (CompressionFactory.LongEncodingStrategy encoding : ENCODINGS) {
String name = entry.getKey() + "-" + compression + "-" + encoding;
log.info("%s: ", name);
File compFile = new File(dir, name);
compFile.delete();
File dataFile = new File(dir, entry.getKey());
ColumnarLongsSerializer writer = CompressionFactory.getLongSerializer("long-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "long", ByteOrder.nativeOrder(), encoding, compression);
try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
writer.open();
String line;
while ((line = br.readLine()) != null) {
writer.add(Long.parseLong(line));
}
writer.writeTo(output, null);
}
log.info("%d", compFile.length() / 1024);
}
}
}
}
use of org.apache.druid.segment.data.CompressionStrategy in project druid by druid-io.
the class StringDimensionMergerV9 method makeColumnDescriptor.
@Override
public ColumnDescriptor makeColumnDescriptor() {
// Now write everything
boolean hasMultiValue = capabilities.hasMultipleValues().isTrue();
final CompressionStrategy compressionStrategy = indexSpec.getDimensionCompression();
final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory();
final ColumnDescriptor.Builder builder = ColumnDescriptor.builder();
builder.setValueType(ValueType.STRING);
builder.setHasMultipleValues(hasMultiValue);
final DictionaryEncodedColumnPartSerde.SerializerBuilder partBuilder = DictionaryEncodedColumnPartSerde.serializerBuilder().withDictionary(dictionaryWriter).withValue(encodedValueSerializer, hasMultiValue, compressionStrategy != CompressionStrategy.UNCOMPRESSED).withBitmapSerdeFactory(bitmapSerdeFactory).withBitmapIndex(bitmapWriter).withSpatialIndex(spatialWriter).withByteOrder(IndexIO.BYTE_ORDER);
return builder.addSerde(partBuilder.build()).build();
}
Aggregations