use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = FileUtils.getTempDirectory();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (IOPeon peon = new TmpFileIOPeon();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(collector);
}
serializer.close();
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeToChannel(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder);
Column column = builder.build();
ComplexColumn complexColumn = column.getComplexColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class IndexMergerV9 method makeColumn.
private void makeColumn(final FileSmoosher v9Smoosher, final String columnName, final ColumnDescriptor serdeficator) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializerUtils.writeString(baos, mapper.writeValueAsString(serdeficator));
byte[] specBytes = baos.toByteArray();
final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(columnName, serdeficator.numBytes() + specBytes.length);
try {
channel.write(ByteBuffer.wrap(specBytes));
serdeficator.write(channel, v9Smoosher);
} finally {
channel.close();
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class GenericIndexedWriter method writeToChannelVersionTwo.
private void writeToChannelVersionTwo(WritableByteChannel channel, FileSmoosher smoosher) throws IOException {
if (smoosher == null) {
throw new IAE("version 2 GenericIndexedWriter requires FileSmoosher.");
}
int bagSizePower = bagSizePower();
OutputStream metaOut = Channels.newOutputStream(channel);
metaOut.write(GenericIndexed.VERSION_TWO);
metaOut.write(objectsSorted ? 0x1 : 0x0);
metaOut.write(Ints.toByteArray(bagSizePower));
metaOut.write(Ints.toByteArray(Ints.checkedCast(numWritten)));
metaOut.write(Ints.toByteArray(fileNameByteArray.length));
metaOut.write(fileNameByteArray);
try (RandomAccessFile headerFile = new RandomAccessFile(ioPeon.getFile(makeFilename("headerLong")), "r")) {
Preconditions.checkNotNull(headerFile, "header file missing.");
long previousValuePosition = 0;
int bagSize = 1 << bagSizePower;
int numberOfFilesRequired = GenericIndexed.getNumberOfFilesRequired(bagSize, numWritten);
byte[] buffer = new byte[1 << 16];
try (InputStream is = new FileInputStream(ioPeon.getFile(makeFilename("values")))) {
int counter = -1;
for (int i = 0; i < numberOfFilesRequired; i++) {
if (i != numberOfFilesRequired - 1) {
// 8 for long bytes.
headerFile.seek((bagSize + counter) * Longs.BYTES);
counter = counter + bagSize;
} else {
// for remaining items.
headerFile.seek((numWritten - 1) * Longs.BYTES);
}
long valuePosition = Long.reverseBytes(headerFile.readLong());
long numBytesToPutInFile = valuePosition - previousValuePosition;
try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateValueFileName(filenameBase, i), numBytesToPutInFile)) {
writeBytesIntoSmooshedChannel(numBytesToPutInFile, buffer, smooshChannel, is);
previousValuePosition = valuePosition;
}
}
}
writeHeaderLong(smoosher, headerFile, bagSizePower, buffer);
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class GenericIndexedWriter method writeHeaderLong.
private void writeHeaderLong(FileSmoosher smoosher, RandomAccessFile headerFile, int bagSizePower, byte[] buffer) throws IOException {
ByteBuffer helperBuffer = ByteBuffer.allocate(Ints.BYTES).order(ByteOrder.nativeOrder());
try (CountingOutputStream finalHeaderOut = new CountingOutputStream(ioPeon.makeOutputStream(makeFilename("header_final")))) {
int numberOfElementsPerValueFile = 1 << bagSizePower;
long currentNumBytes = 0;
long relativeRefBytes = 0;
long relativeNumBytes;
headerFile.seek(0);
// following block converts long header indexes into int header indexes.
for (int pos = 0; pos < numWritten; pos++) {
// to current offset.
if ((pos & (numberOfElementsPerValueFile - 1)) == 0) {
relativeRefBytes = currentNumBytes;
}
currentNumBytes = Long.reverseBytes(headerFile.readLong());
relativeNumBytes = currentNumBytes - relativeRefBytes;
writeIntValueToOutputStream(helperBuffer, Ints.checkedCast(relativeNumBytes), finalHeaderOut);
}
long numBytesToPutInFile = finalHeaderOut.getCount();
finalHeaderOut.close();
try (InputStream is = new FileInputStream(ioPeon.getFile(makeFilename("header_final")))) {
try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateHeaderFileName(filenameBase), numBytesToPutInFile)) {
writeBytesIntoSmooshedChannel(numBytesToPutInFile, buffer, smooshChannel, is);
}
}
}
}
use of io.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class CompressedVSizeIndexedV3WriterTest method checkV2SerializedSizeAndData.
private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
File tmpDirectory = Files.createTempDirectory(String.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor)).toFile();
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
final IOPeon ioPeon = new TmpFileIOPeon();
int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
try {
CompressedIntsIndexedWriter offsetWriter = new CompressedIntsIndexedWriter(offsetChunkFactor, compressionStrategy, new GenericIndexedWriter<>(ioPeon, "offset", CompressedIntBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, offsetChunkFactor), Longs.BYTES * 250000));
GenericIndexedWriter genericIndexed = new GenericIndexedWriter<>(ioPeon, "value", CompressedByteBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, valueChunkFactor * VSizeIndexedInts.getNumBytesForMax(maxValue) + CompressedVSizeIntsIndexedSupplier.bufferPadding(VSizeIndexedInts.getNumBytesForMax(maxValue))), Longs.BYTES * 250000);
CompressedVSizeIntsIndexedWriter valueWriter = new CompressedVSizeIntsIndexedWriter(ioPeon, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
CompressedVSizeIndexedV3Writer writer = new CompressedVSizeIndexedV3Writer(offsetWriter, valueWriter);
writer.open();
for (int[] val : vals) {
writer.add(val);
}
writer.close();
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeToChannel(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
CompressedVSizeIndexedV3Supplier supplierFromByteBuffer = CompressedVSizeIndexedV3Supplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
IndexedMultivalue<IndexedInts> indexedMultivalue = supplierFromByteBuffer.get();
assertEquals(indexedMultivalue.size(), vals.size());
for (int i = 0; i < vals.size(); ++i) {
IndexedInts subVals = indexedMultivalue.get(i);
assertEquals(subVals.size(), vals.get(i).length);
for (int j = 0; j < subVals.size(); ++j) {
assertEquals(subVals.get(j), vals.get(i)[j]);
}
}
CloseQuietly.close(indexedMultivalue);
mapper.close();
} finally {
ioPeon.close();
}
}
Aggregations