use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = FileUtils.getTempDirectory();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (IOPeon peon = new TmpFileIOPeon();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(collector);
}
serializer.close();
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeToChannel(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder);
Column column = builder.build();
ComplexColumn complexColumn = column.getComplexColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.
the class CompressedVSizeIndexedV3WriterTest method checkV2SerializedSizeAndData.
private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
File tmpDirectory = Files.createTempDirectory(String.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor)).toFile();
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
final IOPeon ioPeon = new TmpFileIOPeon();
int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
try {
CompressedIntsIndexedWriter offsetWriter = new CompressedIntsIndexedWriter(offsetChunkFactor, compressionStrategy, new GenericIndexedWriter<>(ioPeon, "offset", CompressedIntBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, offsetChunkFactor), Longs.BYTES * 250000));
GenericIndexedWriter genericIndexed = new GenericIndexedWriter<>(ioPeon, "value", CompressedByteBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, valueChunkFactor * VSizeIndexedInts.getNumBytesForMax(maxValue) + CompressedVSizeIntsIndexedSupplier.bufferPadding(VSizeIndexedInts.getNumBytesForMax(maxValue))), Longs.BYTES * 250000);
CompressedVSizeIntsIndexedWriter valueWriter = new CompressedVSizeIntsIndexedWriter(ioPeon, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
CompressedVSizeIndexedV3Writer writer = new CompressedVSizeIndexedV3Writer(offsetWriter, valueWriter);
writer.open();
for (int[] val : vals) {
writer.add(val);
}
writer.close();
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeToChannel(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
CompressedVSizeIndexedV3Supplier supplierFromByteBuffer = CompressedVSizeIndexedV3Supplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
IndexedMultivalue<IndexedInts> indexedMultivalue = supplierFromByteBuffer.get();
assertEquals(indexedMultivalue.size(), vals.size());
for (int i = 0; i < vals.size(); ++i) {
IndexedInts subVals = indexedMultivalue.get(i);
assertEquals(subVals.size(), vals.get(i).length);
for (int j = 0; j < subVals.size(); ++j) {
assertEquals(subVals.get(j), vals.get(i)[j]);
}
}
CloseQuietly.close(indexedMultivalue);
mapper.close();
} finally {
ioPeon.close();
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.
the class CompressedVSizeIntsIndexedWriterTest method checkV2SerializedSizeAndData.
private void checkV2SerializedSizeAndData(int chunkSize) throws Exception {
File tmpDirectory = FileUtils.getTempDirectory();
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
int maxValue = vals.length > 0 ? Ints.max(vals) : 0;
GenericIndexedWriter genericIndexed = new GenericIndexedWriter<>(ioPeon, "test", CompressedByteBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, chunkSize * VSizeIndexedInts.getNumBytesForMax(maxValue) + CompressedVSizeIntsIndexedSupplier.bufferPadding(VSizeIndexedInts.getNumBytesForMax(maxValue))), Longs.BYTES * 10000);
CompressedVSizeIntsIndexedWriter writer = new CompressedVSizeIntsIndexedWriter(ioPeon, "test", vals.length > 0 ? Ints.max(vals) : 0, chunkSize, byteOrder, compressionStrategy, genericIndexed);
writer.open();
for (int val : vals) {
writer.add(val);
}
writer.close();
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeToChannel(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
CompressedVSizeIntsIndexedSupplier supplierFromByteBuffer = CompressedVSizeIntsIndexedSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
IndexedInts indexedInts = supplierFromByteBuffer.get();
for (int i = 0; i < vals.length; ++i) {
assertEquals(vals[i], indexedInts.get(i));
}
CloseQuietly.close(indexedInts);
mapper.close();
}
use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.
the class CompressedIntsIndexedWriterTest method checkV2SerializedSizeAndData.
private void checkV2SerializedSizeAndData(int chunkFactor) throws Exception {
File tmpDirectory = Files.createTempDirectory(String.format("CompressedIntsIndexedWriterTest_%d", chunkFactor)).toFile();
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
final IOPeon ioPeon = new TmpFileIOPeon();
try {
CompressedIntsIndexedWriter writer = new CompressedIntsIndexedWriter(chunkFactor, compressionStrategy, new GenericIndexedWriter<>(ioPeon, "test", CompressedIntBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, chunkFactor), Longs.BYTES * 10000));
writer.open();
for (int val : vals) {
writer.add(val);
}
writer.close();
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeToChannel(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
// read from ByteBuffer and check values
CompressedIntsIndexedSupplier supplierFromByteBuffer = CompressedIntsIndexedSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
IndexedInts indexedInts = supplierFromByteBuffer.get();
assertEquals(vals.length, indexedInts.size());
for (int i = 0; i < vals.length; ++i) {
assertEquals(vals[i], indexedInts.get(i));
}
CloseQuietly.close(indexedInts);
mapper.close();
} finally {
ioPeon.close();
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.
the class IndexMergerTest method testPersistNullColumnSkipping.
@Test
public void testPersistNullColumnSkipping() throws Exception {
//check that column d2 is skipped because it only has null values
IncrementalIndex index1 = IncrementalIndexTest.createIndex(new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A") });
index1.add(new MapBasedInputRow(1L, Lists.newArrayList("d1", "d2"), ImmutableMap.<String, Object>of("d1", "a", "d2", "", "A", 1)));
index1.add(new MapBasedInputRow(1L, Lists.newArrayList("d1", "d2"), ImmutableMap.<String, Object>of("d1", "b", "d2", "", "A", 1)));
final File tempDir = temporaryFolder.newFolder();
QueryableIndex index = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(index1, tempDir, indexSpec)));
List<String> expectedColumnNames = Arrays.asList("A", "d1");
List<String> actualColumnNames = Lists.newArrayList(index.getColumnNames());
Collections.sort(expectedColumnNames);
Collections.sort(actualColumnNames);
Assert.assertEquals(expectedColumnNames, actualColumnNames);
SmooshedFileMapper sfm = closer.closeLater(SmooshedFileMapper.load(tempDir));
List<String> expectedFilenames = Arrays.asList("A", "__time", "d1", "index.drd", "metadata.drd");
List<String> actualFilenames = new ArrayList<>(sfm.getInternalFilenames());
Collections.sort(expectedFilenames);
Collections.sort(actualFilenames);
Assert.assertEquals(expectedFilenames, actualFilenames);
}
Aggregations