use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class CompressedLongsSerdeTest method testValues.
public void testValues(long[] values) throws Exception {
ColumnarLongsSerializer serializer = CompressionFactory.getLongSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, encodingStrategy, compressionStrategy);
serializer.open();
for (long value : values) {
serializer.add(value);
}
Assert.assertEquals(values.length, serializer.size());
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.writeTo(Channels.newChannel(baos), null);
Assert.assertEquals(baos.size(), serializer.getSerializedSize());
CompressedColumnarLongsSupplier supplier = CompressedColumnarLongsSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
ColumnarLongs longs = supplier.get();
assertIndexMatchesVals(longs, values);
for (int i = 0; i < 10; i++) {
int a = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
int b = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
int start = a < b ? a : b;
int end = a < b ? b : a;
tryFill(longs, values, start, end - start);
}
testSupplierSerde(supplier, values);
testConcurrentThreadReads(supplier, longs, values);
longs.close();
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = temporaryFolder.newFolder();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(new ObjectColumnSelector() {
@Nullable
@Override
public Object getObject() {
return collector;
}
@Override
public Class classOfObject() {
return HyperLogLogCollector.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// doesn't matter in tests
}
});
}
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeTo(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder, null);
ColumnHolder columnHolder = builder.build();
ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class CompressedFloatsSerdeTest method testWithValues.
public void testWithValues(float[] values) throws Exception {
ColumnarFloatsSerializer serializer = CompressionFactory.getFloatSerializer("test", new OffHeapMemorySegmentWriteOutMedium(), "test", order, compressionStrategy);
serializer.open();
for (float value : values) {
serializer.add(value);
}
Assert.assertEquals(values.length, serializer.size());
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.writeTo(Channels.newChannel(baos), null);
Assert.assertEquals(baos.size(), serializer.getSerializedSize());
CompressedColumnarFloatsSupplier supplier = CompressedColumnarFloatsSupplier.fromByteBuffer(ByteBuffer.wrap(baos.toByteArray()), order);
ColumnarFloats floats = supplier.get();
assertIndexMatchesVals(floats, values);
for (int i = 0; i < 10; i++) {
int a = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
int b = (int) (ThreadLocalRandom.current().nextDouble() * values.length);
int start = a < b ? a : b;
int end = a < b ? b : a;
tryFill(floats, values, start, end - start);
}
testSupplierSerde(supplier, values);
testConcurrentThreadReads(supplier, floats, values);
floats.close();
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class FloatCompressionBenchmarkFileGenerator method main.
public static void main(String[] args) throws IOException {
if (args.length >= 1) {
dirPath = args[0];
}
GeneratorColumnSchema enumeratedSchema = GeneratorColumnSchema.makeEnumerated("", ValueType.FLOAT, true, 1, 0d, ImmutableList.of(0f, 1.1f, 2.2f, 3.3f, 4.4f), ImmutableList.of(0.95, 0.001, 0.0189, 0.03, 0.0001));
GeneratorColumnSchema zipfLowSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 1d);
GeneratorColumnSchema zipfHighSchema = GeneratorColumnSchema.makeZipf("", ValueType.FLOAT, true, 1, 0d, -1, 1000, 3d);
GeneratorColumnSchema sequentialSchema = GeneratorColumnSchema.makeSequential("", ValueType.FLOAT, true, 1, 0d, 1470187671, 2000000000);
GeneratorColumnSchema uniformSchema = GeneratorColumnSchema.makeContinuousUniform("", ValueType.FLOAT, true, 1, 0d, 0, 1000);
Map<String, ColumnValueGenerator> generators = new HashMap<>();
generators.put("enumerate", new ColumnValueGenerator(enumeratedSchema, 1));
generators.put("zipfLow", new ColumnValueGenerator(zipfLowSchema, 1));
generators.put("zipfHigh", new ColumnValueGenerator(zipfHighSchema, 1));
generators.put("sequential", new ColumnValueGenerator(sequentialSchema, 1));
generators.put("uniform", new ColumnValueGenerator(uniformSchema, 1));
File dir = new File(dirPath);
dir.mkdir();
// create data files using BenchmarkColunValueGenerator
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
final File dataFile = new File(dir, entry.getKey());
dataFile.delete();
try (Writer writer = Files.newBufferedWriter(dataFile.toPath(), StandardCharsets.UTF_8)) {
for (int i = 0; i < ROW_NUM; i++) {
writer.write((Float) entry.getValue().generateRowValue() + "\n");
}
}
}
// create compressed files using all combinations of CompressionStrategy and FloatEncoding provided
for (Map.Entry<String, ColumnValueGenerator> entry : generators.entrySet()) {
for (CompressionStrategy compression : COMPRESSIONS) {
String name = entry.getKey() + "-" + compression;
log.info("%s: ", name);
File compFile = new File(dir, name);
compFile.delete();
File dataFile = new File(dir, entry.getKey());
ColumnarFloatsSerializer writer = CompressionFactory.getFloatSerializer("float-benchmark", new OffHeapMemorySegmentWriteOutMedium(), "float", ByteOrder.nativeOrder(), compression);
try (BufferedReader br = Files.newBufferedReader(dataFile.toPath(), StandardCharsets.UTF_8);
FileChannel output = FileChannel.open(compFile.toPath(), StandardOpenOption.CREATE_NEW, StandardOpenOption.WRITE)) {
writer.open();
String line;
while ((line = br.readLine()) != null) {
writer.add(Float.parseFloat(line));
}
writer.writeTo(output, null);
}
log.info("%d", compFile.length() / 1024);
}
}
}
use of org.apache.druid.segment.writeout.OffHeapMemorySegmentWriteOutMedium in project druid by druid-io.
the class GenericIndexedBenchmark method createGenericIndexed.
@Setup(Level.Trial)
public void createGenericIndexed() throws IOException {
GenericIndexedWriter<byte[]> genericIndexedWriter = new GenericIndexedWriter<>(new OffHeapMemorySegmentWriteOutMedium(), "genericIndexedBenchmark", BYTE_ARRAY_STRATEGY);
genericIndexedWriter.open();
// GenericIndexObject caches prevObject for comparison, so need two arrays for correct objectsSorted computation.
ByteBuffer[] elements = new ByteBuffer[2];
elements[0] = ByteBuffer.allocate(elementSize);
elements[1] = ByteBuffer.allocate(elementSize);
for (int i = 0; i < n; i++) {
ByteBuffer element = elements[i & 1];
element.putInt(0, i);
genericIndexedWriter.write(element.array());
}
smooshDir = FileUtils.createTempDir();
file = File.createTempFile("genericIndexedBenchmark", "meta");
try (FileChannel fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE);
FileSmoosher fileSmoosher = new FileSmoosher(smooshDir)) {
genericIndexedWriter.writeTo(fileChannel, fileSmoosher);
}
FileChannel fileChannel = FileChannel.open(file.toPath());
MappedByteBuffer byteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, file.length());
genericIndexed = GenericIndexed.read(byteBuffer, BYTE_ARRAY_STRATEGY, SmooshedFileMapper.load(smooshDir));
}
Aggregations