use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class V3CompressedVSizeColumnarMultiIntsSerializerTest method generateV2SerializedSizeAndData.
private void generateV2SerializedSizeAndData(long numRows, int maxValue, int maxValuesPerRow, int offsetChunkFactor, int valueChunkFactor) throws Exception {
File tmpDirectory = FileUtils.createTempDir(StringUtils.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor));
FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
try (SegmentWriteOutMedium segmentWriteOutMedium = TmpFileSegmentWriteOutMediumFactory.instance().makeSegmentWriteOutMedium(temporaryFolder.newFolder())) {
CompressedColumnarIntsSerializer offsetWriter = new CompressedColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, offsetChunkFactor, byteOrder, compressionStrategy, GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "offset", compressionStrategy, Long.BYTES * 250000));
GenericIndexedWriter genericIndexed = GenericIndexedWriter.ofCompressedByteBuffers(segmentWriteOutMedium, "value", compressionStrategy, Long.BYTES * 250000);
CompressedVSizeColumnarIntsSerializer valueWriter = new CompressedVSizeColumnarIntsSerializer(TEST_COLUMN_NAME, segmentWriteOutMedium, maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
V3CompressedVSizeColumnarMultiIntsSerializer writer = new V3CompressedVSizeColumnarMultiIntsSerializer(TEST_COLUMN_NAME, offsetWriter, valueWriter);
writer.open();
for (long l = 0L; l < numRows; l++) {
writer.addValues(new ArrayBasedIndexedInts(generateRow(rand, maxValue, maxValuesPerRow)));
}
final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
writer.writeTo(channel, smoosher);
channel.close();
smoosher.close();
SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
V3CompressedVSizeColumnarMultiIntsSupplier supplierFromByteBuffer = V3CompressedVSizeColumnarMultiIntsSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
ColumnarMultiInts columnarMultiInts = supplierFromByteBuffer.get();
Assert.assertEquals(columnarMultiInts.size(), numRows);
Random verifier = new Random(0);
for (int i = 0; i < numRows; ++i) {
IndexedInts subVals = columnarMultiInts.get(i);
int[] expected = generateRow(verifier, maxValue, maxValuesPerRow);
Assert.assertEquals(subVals.size(), expected.length);
for (int j = 0, size = subVals.size(); j < size; ++j) {
Assert.assertEquals(subVals.get(j), expected[j]);
}
}
CloseableUtils.closeAll(columnarMultiInts, mapper);
}
}
use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.
@Test
public void testSanity() throws IOException {
HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
int[] cases = { 1000, 5000, 10000, 20000 };
int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Long.BYTES, 2500 * Long.BYTES };
for (int columnSize : columnSizes) {
for (int aCase : cases) {
File tmpFile = temporaryFolder.newFolder();
HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
try (SegmentWriteOutMedium segmentWriteOutMedium = new OffHeapMemorySegmentWriteOutMedium();
FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(segmentWriteOutMedium, "test", serde.getObjectStrategy(), columnSize);
serializer.open();
for (int i = 0; i < aCase; i++) {
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
byte[] hashBytes = fn.hashLong(i).asBytes();
collector.add(hashBytes);
baseCollector.fold(collector);
serializer.serialize(new ObjectColumnSelector() {
@Nullable
@Override
public Object getObject() {
return collector;
}
@Override
public Class classOfObject() {
return HyperLogLogCollector.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// doesn't matter in tests
}
});
}
try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
serializer.writeTo(channel, v9Smoosher);
}
}
SmooshedFileMapper mapper = Smoosh.map(tmpFile);
final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
serde.deserializeColumn(mapper.mapFile("test"), builder, null);
ColumnHolder columnHolder = builder.build();
ComplexColumn complexColumn = (ComplexColumn) columnHolder.getColumn();
HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
for (int i = 0; i < aCase; i++) {
collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
}
Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
}
}
}
use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class IndexMergerV9 method makeIndexBinary.
private void makeIndexBinary(final FileSmoosher v9Smoosher, final List<IndexableAdapter> adapters, final File outDir, final List<String> mergedDimensions, final List<String> mergedMetrics, final ProgressIndicator progress, final IndexSpec indexSpec, final List<DimensionMergerV9> mergers) throws IOException {
final String section = "make index.drd";
progress.startSection(section);
long startTime = System.currentTimeMillis();
final Set<String> finalDimensions = new LinkedHashSet<>();
final Set<String> finalColumns = new LinkedHashSet<>(mergedMetrics);
for (int i = 0; i < mergedDimensions.size(); ++i) {
if (mergers.get(i).canSkip()) {
continue;
}
finalColumns.add(mergedDimensions.get(i));
finalDimensions.add(mergedDimensions.get(i));
}
GenericIndexed<String> cols = GenericIndexed.fromIterable(finalColumns, GenericIndexed.STRING_STRATEGY);
GenericIndexed<String> dims = GenericIndexed.fromIterable(finalDimensions, GenericIndexed.STRING_STRATEGY);
final String bitmapSerdeFactoryType = mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory());
final long numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16 + SERIALIZER_UTILS.getSerializedStringByteSize(bitmapSerdeFactoryType);
final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes);
cols.writeTo(writer, v9Smoosher);
dims.writeTo(writer, v9Smoosher);
DateTime minTime = DateTimes.MAX;
DateTime maxTime = DateTimes.MIN;
for (IndexableAdapter index : adapters) {
minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart());
maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd());
}
final Interval dataInterval = new Interval(minTime, maxTime);
SERIALIZER_UTILS.writeLong(writer, dataInterval.getStartMillis());
SERIALIZER_UTILS.writeLong(writer, dataInterval.getEndMillis());
SERIALIZER_UTILS.writeString(writer, bitmapSerdeFactoryType);
writer.close();
IndexIO.checkFileSize(new File(outDir, "index.drd"));
log.debug("Completed index.drd in %,d millis.", System.currentTimeMillis() - startTime);
progress.stopSection(section);
}
use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class IndexMergerV9 method makeColumn.
private void makeColumn(final FileSmoosher v9Smoosher, final String columnName, final ColumnDescriptor serdeficator) throws IOException {
ZeroCopyByteArrayOutputStream specBytes = new ZeroCopyByteArrayOutputStream();
SERIALIZER_UTILS.writeString(specBytes, mapper.writeValueAsString(serdeficator));
try (SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter(columnName, specBytes.size() + serdeficator.getSerializedSize())) {
specBytes.writeTo(channel);
serdeficator.writeTo(channel, v9Smoosher);
}
}
use of org.apache.druid.java.util.common.io.smoosh.SmooshedWriter in project druid by druid-io.
the class GenericIndexedWriter method writeHeaderLong.
private void writeHeaderLong(FileSmoosher smoosher, int bagSizePower) throws IOException {
ByteBuffer helperBuffer = ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.nativeOrder());
int numberOfElementsPerValueFile = 1 << bagSizePower;
long currentNumBytes = 0;
long relativeRefBytes = 0;
long relativeNumBytes;
try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(generateHeaderFileName(filenameBase), ((long) numWritten) * Integer.BYTES)) {
// following block converts long header indexes into int header indexes.
for (int pos = 0; pos < numWritten; pos++) {
// to current offset.
if ((pos & (numberOfElementsPerValueFile - 1)) == 0) {
relativeRefBytes = currentNumBytes;
}
currentNumBytes = headerOutLong.getLong(pos);
relativeNumBytes = currentNumBytes - relativeRefBytes;
helperBuffer.putInt(0, checkedCastNonnegativeLongToInt(relativeNumBytes));
helperBuffer.clear();
smooshChannel.write(helperBuffer);
}
}
}
Aggregations