use of org.apache.druid.segment.writeout.SegmentWriteOutMedium in project druid by druid-io.
the class BaseColumnarLongsBenchmark method encodeToFile.
static int encodeToFile(long[] vals, String encoding, FileChannel output) throws IOException {
SegmentWriteOutMedium writeOutMedium = new OnHeapMemorySegmentWriteOutMedium();
ColumnarLongsSerializer serializer;
switch(encoding) {
case "lz4-longs":
serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "lz4-longs", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.LONGS, CompressionStrategy.LZ4);
break;
case "lz4-auto":
serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "lz4-auto", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.AUTO, CompressionStrategy.LZ4);
break;
case "none-longs":
serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "none-longs", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.LONGS, CompressionStrategy.NONE);
break;
case "none-auto":
serializer = CompressionFactory.getLongSerializer(encoding, writeOutMedium, "none-auto", ByteOrder.LITTLE_ENDIAN, CompressionFactory.LongEncodingStrategy.AUTO, CompressionStrategy.NONE);
break;
default:
throw new RuntimeException("unknown encoding");
}
serializer.open();
for (long val : vals) {
serializer.add(val);
}
serializer.writeTo(output, null);
return (int) serializer.getSerializedSize();
}
use of org.apache.druid.segment.writeout.SegmentWriteOutMedium in project druid by druid-io.
the class CompressedLongsSerdeTest method testTooManyValues.
// this test takes ~50 minutes to run (even skipping 'auto')
@Ignore
@Test
public void testTooManyValues() throws IOException {
// uncomment this if 'auto' encoded long unbounded heap usage gets put in check and this can actually pass
if (encodingStrategy.equals(CompressionFactory.LongEncodingStrategy.AUTO)) {
return;
}
expectedException.expect(ColumnCapacityExceededException.class);
expectedException.expectMessage(ColumnCapacityExceededException.formatMessage("test"));
try (SegmentWriteOutMedium segmentWriteOutMedium = TmpFileSegmentWriteOutMediumFactory.instance().makeSegmentWriteOutMedium(temporaryFolder.newFolder())) {
ColumnarLongsSerializer serializer = CompressionFactory.getLongSerializer("test", segmentWriteOutMedium, "test", order, encodingStrategy, compressionStrategy);
serializer.open();
final long numRows = Integer.MAX_VALUE + 100L;
for (long i = 0L; i < numRows; i++) {
serializer.add(ThreadLocalRandom.current().nextLong());
}
}
}
use of org.apache.druid.segment.writeout.SegmentWriteOutMedium in project druid by druid-io.
the class CompressedVSizeColumnarIntsSerializerTest method testTooManyValues.
// this test takes ~18 minutes to run
@Ignore
@Test
public void testTooManyValues() throws IOException {
final int maxValue = 0x0FFFFFFF;
final int maxChunkSize = CompressedVSizeColumnarIntsSupplier.maxIntsInBufferForValue(maxValue);
expectedException.expect(ColumnCapacityExceededException.class);
expectedException.expectMessage(ColumnCapacityExceededException.formatMessage("test"));
try (SegmentWriteOutMedium segmentWriteOutMedium = TmpFileSegmentWriteOutMediumFactory.instance().makeSegmentWriteOutMedium(temporaryFolder.newFolder())) {
GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "test", compressionStrategy, Long.BYTES * 10000);
CompressedVSizeColumnarIntsSerializer serializer = new CompressedVSizeColumnarIntsSerializer("test", segmentWriteOutMedium, maxValue, maxChunkSize, byteOrder, compressionStrategy, genericIndexed);
serializer.open();
final long numRows = Integer.MAX_VALUE + 100L;
for (long i = 0L; i < numRows; i++) {
serializer.addValue(ThreadLocalRandom.current().nextInt(0, Integer.MAX_VALUE));
}
}
}
use of org.apache.druid.segment.writeout.SegmentWriteOutMedium in project druid by druid-io.
the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkV2SerializedSizeAndData.
private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
writer.open();
for (int[] val : vals) {
writer.addValues(new ArrayBasedIndexedInts(val));
}
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeTo(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder);
ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
Assert.assertEquals(columnarMultiInts.size(), vals.size());
for (int i = 0; i < vals.size(); ++i) {
IndexedInts subVals = columnarMultiInts.get(i);
Assert.assertEquals(subVals.size(), vals.get(i).length);
for (int j = 0, size = subVals.size(); j < size; ++j) {
Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
}
}
CloseableUtils.closeAll(columnarMultiInts, mapper);
}
}
use of org.apache.druid.segment.writeout.SegmentWriteOutMedium in project druid by druid-io.
the class V3CompressedVSizeColumnarMultiIntsSerializerTest method checkSerializedSizeAndData.
private void checkSerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
FileSmoosher smoosher = new FileSmoosher(temporaryFolder.newFolder());
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium()) {
int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, "offset", offsetChunkFactor, byteOrder, compressionStrategy);
CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy);
V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromIterable = V3CompressedVSizeColumnarMultiIntsSupplier.fromIterable(Iterables.transform(vals, ArrayBasedIndexedInts::new), offsetChunkFactor, maxValue, byteOrder, compressionStrategy, segmentWriteOutMedium.getCloser());
writer.open();
for (int[] val : vals) {
writer.addValues(new ArrayBasedIndexedInts(val));
}
long writtenLength = writer.getSerializedSize();
final WriteOutBytes writeOutBytes = segmentWriteOutMedium.makeWriteOutBytes();
writer.writeTo(writeOutBytes, smoosher);
smoosher.close();
Assert.assertEquals(writtenLength, supplierFromIterable.getSerializedSize());
// read from ByteBuffer and check values
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(ByteBuffer.wrap(IOUtils.toByteArray(writeOutBytes.asInputStream())), byteOrder);
try (final ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get()) {
Assert.assertEquals(columnarMultiInts.size(), vals.size());
for (int i = 0; i < vals.size(); ++i) {
IndexedInts subVals = columnarMultiInts.get(i);
Assert.assertEquals(subVals.size(), vals.get(i).length);
for (int j = 0, size = subVals.size(); j < size; ++j) {
Assert.assertEquals(subVals.get(j), vals.get(i)[j]);
}
}
}
}
}
Aggregations