use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class LongCompressionBenchmarkFileGenerator method main.
public static void main(String[] args) throws IOException {
if (args.length >= 1) {
dirPath = args[0];
}
GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.LONG, true, 1, 0d, ImmutableList.of(0, 1, 2, 3, 4), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 1d);
GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.LONG, true, 1, 0d, -1, 1000, 3d);
GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.LONG, true, 1, 0d, 1470187671, 2000000000);
GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeDiscreteUniform("", ValueType.LONG, true, 1, 0d, 0, 1000);
Map<String, ColumnValueGenerator> generators = new HashMap<>();
generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
File dir = new File(dirPath);
dir.mkdir();
// create data files using BenchmarkColunValueGenerator
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
final File dataFile = new File(dir, entry.getKey());
dataFile.delete();
try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
for (int i = 0; i < ROW_NUM; i++) {
writer.write((long) entry.getValue().generateRowValue() + "\n");
}
}
}
// create compressed files using all combinations of CompressionStrategy and LongEncoding provided
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
for (CompressionStrategy compression : COMPRESSIONS) {
for (CompressionFactory.LongEncodingStrategy encoding : ENCODINGS) {
String name = entry.getKey() + "-" + compression + "-" + encoding;
log.info("%s: ", name);
File compFile = new File(dir, name);
compFile.delete();
File dataFile = new File(dir, entry.getKey());
ColumnarLongsSerializer writer = CompressionFactory.getLongSerializer("long-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "long", ByteOrder.nativeOrder(), encoding, compression);
try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
writer.open();
String line;
while ((line = br.readLine()) != null) {
writer.add(Long.parseLong(line));
}
writer.writeTo(output, null);
}
log.info("%d", compFile.length() / 1024);
}
}
}
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkV2SerializedSizeAndData.
private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
writer.open();
for (int[] val : vals) {
writer.addValues(new ArrayBasedIndexedInts(val));
}
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeTo(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
Assert.assertEquals(columnarMultiInts.size(), vals.size());
for (int i = 0; i < vals.size(); ++i) {
IndexedInts subVals = columnarMultiInts.get(i);
Assert.assertEquals(subVals.size(), vals.get(i).length);
for (int j = 0, size = subVals.size(); j < size; ++j) {
Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
}
}
CloseableUtils.closeAll(columnarMultiInts, mapper);
}
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class CompressedDoublesSerdeTest method testWithValues.
public void testWithValues(double[] values) throws Exception {
ColumnarDoublesSerializer serializer = CompressionFactory.getDoubleSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, compressionStrategy);
serializer.open();
for (double value : values) {
serializer.add(value);
}
Assert.assertEquals(values.length, serializer.size());
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.writeTo(Channels.newChannel(baos), null);
Assert.assertEquals(baos.size(), serializer.getSerializedSize());
Supplier<ColumnarDoubles> supplier = CompressedColumnarDoublesSuppliers.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
ColumnarDoubles doubles = supplier.get();
assertIndexMatchesVals(doubles, values);
for (int i = 0; i < 10; i++) {
int a = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
int b = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
int start = a < b ? a : b;
int end = a < b ? b : a;
tryFill(doubles, values, start, end - start);
}
testConcurrentThreadReads(supplier, doubles, values);
doubles.close();
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class CompressedLongsAutoEncodingSerdeTest method testValues.
public void testValues(long[] values) throws Exception {
ColumnarLongsSerializer serializer = CompressionFactory.getLongSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, encodingStrategy, compressionStrategy);
serializer.open();
for (long value : values) {
serializer.add(value);
}
Assert.assertEquals(values.length, serializer.size());
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.writeTo(Channels.newChannel(baos), null);
Assert.assertEquals(baos.size(), serializer.getSerializedSize());
CompressedColumnarLongsSupplier supplier = CompressedColumnarLongsSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
ColumnarLongs longs = supplier.get();
assertIndexMatchesVals(longs, values);
longs.close();
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkSerializedSizeAndData.
private void checkSerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
FileSmoosher smoosher = new FileSmoosher(temporaryFolder.newFolder());
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, "offset", offsetChunkFactor, byteOrder, compressionStrategy);
CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy);
V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromIterable = V3CompressedVSizeColumnarMultiIntsSupplier.fromIterable(Iterables.transform(vals, ArrayBasedIndexedInts::new), offsetChunkFactor, maxValue, byteOrder, compressionStrategy, segmentWriteOutMedium.getCloser());
writer.open();
for (int[] val : vals) {
writer.addValues(new ArrayBasedIndexedInts(val));
}
long writtenLength = writer.getSerializedSize();
final WriteOutBytes writeOutBytes = segmentWriteOutMedium.makeWriteOutBytes();
writer.writeTo(writeOutBytes, smoosher);
smoosher.close();
Assert.assertEquals(writtenLength, supplierFromIterable.getSerializedSize());
// read from ByteBuffer and check values
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(ByteBuffer.wrap(IOUtils.toByteArray(writeOutBytes.asInputStream())), byteOrder);
try (final ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get()) {
Assert.assertEquals(columnarMultiInts.size(), vals.size());
for (int i = 0; i < vals.size(); ++i) {
IndexedInts subVals = columnarMultiInts.get(i);
Assert.assertEquals(subVals.size(), vals.get(i).length);
for (int j = 0, size = subVals.size(); j < size; ++j) {
Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
}
}
}
}
}
Aggregations