Examples with SmooshedFileMapper - io.druid.java.util.common.io.smoosh.SmooshedFileMapper

Example 1 with SmooshedFileMapper

use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.

the class LargeColumnSupportedComplexColumnSerializerTest method testSanity.

@Test
public void testSanity() throws IOException {
    HyperUniquesSerdeForTest serde = new HyperUniquesSerdeForTest(Hashing.murmur3_128());
    int[] cases = { 1000, 5000, 10000, 20000 };
    int[] columnSizes = { Integer.MAX_VALUE, Integer.MAX_VALUE / 2, Integer.MAX_VALUE / 4, 5000 * Longs.BYTES, 2500 * Longs.BYTES };
    for (int columnSize : columnSizes) {
        for (int aCase : cases) {
            File tmpFile = FileUtils.getTempDirectory();
            HyperLogLogCollector baseCollector = HyperLogLogCollector.makeLatestCollector();
            try (IOPeon peon = new TmpFileIOPeon();
                FileSmoosher v9Smoosher = new FileSmoosher(tmpFile)) {
                LargeColumnSupportedComplexColumnSerializer serializer = LargeColumnSupportedComplexColumnSerializer.createWithColumnSize(peon, "test", serde.getObjectStrategy(), columnSize);
                serializer.open();
                for (int i = 0; i < aCase; i++) {
                    HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
                    byte[] hashBytes = fn.hashLong(i).asBytes();
                    collector.add(hashBytes);
                    baseCollector.fold(collector);
                    serializer.serialize(collector);
                }
                serializer.close();
                try (final SmooshedWriter channel = v9Smoosher.addWithSmooshedWriter("test", serializer.getSerializedSize())) {
                    serializer.writeToChannel(channel, v9Smoosher);
                }
            }
            SmooshedFileMapper mapper = Smoosh.map(tmpFile);
            final ColumnBuilder builder = new ColumnBuilder().setType(ValueType.COMPLEX).setHasMultipleValues(false).setFileMapper(mapper);
            serde.deserializeColumn(mapper.mapFile("test"), builder);
            Column column = builder.build();
            ComplexColumn complexColumn = column.getComplexColumn();
            HyperLogLogCollector collector = HyperLogLogCollector.makeLatestCollector();
            for (int i = 0; i < aCase; i++) {
                collector.fold((HyperLogLogCollector) complexColumn.getRowValue(i));
            }
            Assert.assertEquals(baseCollector.estimateCardinality(), collector.estimateCardinality(), 0.0);
        }
    }
}

Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) HyperLogLogCollector(io.druid.hll.HyperLogLogCollector) IOPeon(io.druid.segment.data.IOPeon) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) Column(io.druid.segment.column.Column) ComplexColumn(io.druid.segment.column.ComplexColumn) TmpFileIOPeon(io.druid.segment.data.TmpFileIOPeon) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) ColumnBuilder(io.druid.segment.column.ColumnBuilder) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper) ComplexColumn(io.druid.segment.column.ComplexColumn) Test(org.junit.Test)

Example 2 with SmooshedFileMapper

use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.

the class CompressedVSizeIndexedV3WriterTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int offsetChunkFactor, int valueChunkFactor) throws Exception {
    File tmpDirectory = Files.createTempDirectory(String.format("CompressedVSizeIndexedV3WriterTest_%d_%d", offsetChunkFactor, offsetChunkFactor)).toFile();
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    final IOPeon ioPeon = new TmpFileIOPeon();
    int maxValue = vals.size() > 0 ? getMaxValue(vals) : 0;
    try {
        CompressedIntsIndexedWriter offsetWriter = new CompressedIntsIndexedWriter(offsetChunkFactor, compressionStrategy, new GenericIndexedWriter<>(ioPeon, "offset", CompressedIntBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, offsetChunkFactor), Longs.BYTES * 250000));
        GenericIndexedWriter genericIndexed = new GenericIndexedWriter<>(ioPeon, "value", CompressedByteBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, valueChunkFactor * VSizeIndexedInts.getNumBytesForMax(maxValue) + CompressedVSizeIntsIndexedSupplier.bufferPadding(VSizeIndexedInts.getNumBytesForMax(maxValue))), Longs.BYTES * 250000);
        CompressedVSizeIntsIndexedWriter valueWriter = new CompressedVSizeIntsIndexedWriter(ioPeon, "value", maxValue, valueChunkFactor, byteOrder, compressionStrategy, genericIndexed);
        CompressedVSizeIndexedV3Writer writer = new CompressedVSizeIndexedV3Writer(offsetWriter, valueWriter);
        writer.open();
        for (int[] val : vals) {
            writer.add(val);
        }
        writer.close();
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeToChannel(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        CompressedVSizeIndexedV3Supplier supplierFromByteBuffer = CompressedVSizeIndexedV3Supplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
        IndexedMultivalue<IndexedInts> indexedMultivalue = supplierFromByteBuffer.get();
        assertEquals(indexedMultivalue.size(), vals.size());
        for (int i = 0; i < vals.size(); ++i) {
            IndexedInts subVals = indexedMultivalue.get(i);
            assertEquals(subVals.size(), vals.get(i).length);
            for (int j = 0; j < subVals.size(); ++j) {
                assertEquals(subVals.get(j), vals.get(i)[j]);
            }
        }
        CloseQuietly.close(indexedMultivalue);
        mapper.close();
    } finally {
        ioPeon.close();
    }
}

Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) CompressedVSizeIndexedV3Supplier(io.druid.segment.CompressedVSizeIndexedV3Supplier) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 3 with SmooshedFileMapper

use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.

the class CompressedVSizeIntsIndexedWriterTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int chunkSize) throws Exception {
    File tmpDirectory = FileUtils.getTempDirectory();
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    int maxValue = vals.length > 0 ? Ints.max(vals) : 0;
    GenericIndexedWriter genericIndexed = new GenericIndexedWriter<>(ioPeon, "test", CompressedByteBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, chunkSize * VSizeIndexedInts.getNumBytesForMax(maxValue) + CompressedVSizeIntsIndexedSupplier.bufferPadding(VSizeIndexedInts.getNumBytesForMax(maxValue))), Longs.BYTES * 10000);
    CompressedVSizeIntsIndexedWriter writer = new CompressedVSizeIntsIndexedWriter(ioPeon, "test", vals.length > 0 ? Ints.max(vals) : 0, chunkSize, byteOrder, compressionStrategy, genericIndexed);
    writer.open();
    for (int val : vals) {
        writer.add(val);
    }
    writer.close();
    final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
    writer.writeToChannel(channel, smoosher);
    channel.close();
    smoosher.close();
    SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
    CompressedVSizeIntsIndexedSupplier supplierFromByteBuffer = CompressedVSizeIntsIndexedSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
    IndexedInts indexedInts = supplierFromByteBuffer.get();
    for (int i = 0; i < vals.length; ++i) {
        assertEquals(vals[i], indexedInts.get(i));
    }
    CloseQuietly.close(indexedInts);
    mapper.close();
}

Also used : SmooshedWriter(io.druid.java.util.common.io.smoosh.SmooshedWriter) FileSmoosher(io.druid.java.util.common.io.smoosh.FileSmoosher) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper)

Example 4 with SmooshedFileMapper

use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.

the class CompressedIntsIndexedWriterTest method checkV2SerializedSizeAndData.

private void checkV2SerializedSizeAndData(int chunkFactor) throws Exception {
    File tmpDirectory = Files.createTempDirectory(String.format("CompressedIntsIndexedWriterTest_%d", chunkFactor)).toFile();
    FileSmoosher smoosher = new FileSmoosher(tmpDirectory);
    final IOPeon ioPeon = new TmpFileIOPeon();
    try {
        CompressedIntsIndexedWriter writer = new CompressedIntsIndexedWriter(chunkFactor, compressionStrategy, new GenericIndexedWriter<>(ioPeon, "test", CompressedIntBufferObjectStrategy.getBufferForOrder(byteOrder, compressionStrategy, chunkFactor), Longs.BYTES * 10000));
        writer.open();
        for (int val : vals) {
            writer.add(val);
        }
        writer.close();
        final SmooshedWriter channel = smoosher.addWithSmooshedWriter("test", writer.getSerializedSize());
        writer.writeToChannel(channel, smoosher);
        channel.close();
        smoosher.close();
        SmooshedFileMapper mapper = Smoosh.map(tmpDirectory);
        // read from ByteBuffer and check values
        CompressedIntsIndexedSupplier supplierFromByteBuffer = CompressedIntsIndexedSupplier.fromByteBuffer(mapper.mapFile("test"), byteOrder, mapper);
        IndexedInts indexedInts = supplierFromByteBuffer.get();
        assertEquals(vals.length, indexedInts.size());
        for (int i = 0; i < vals.length; ++i) {
            assertEquals(vals[i], indexedInts.get(i));
        }
        CloseQuietly.close(indexedInts);
        mapper.close();
    } finally {
        ioPeon.close();
    }
}

Example 5 with SmooshedFileMapper

use of io.druid.java.util.common.io.smoosh.SmooshedFileMapper in project druid by druid-io.

the class IndexMergerTest method testPersistNullColumnSkipping.

@Test
public void testPersistNullColumnSkipping() throws Exception {
    //check that column d2 is skipped because it only has null values
    IncrementalIndex index1 = IncrementalIndexTest.createIndex(new AggregatorFactory[] { new LongSumAggregatorFactory("A", "A") });
    index1.add(new MapBasedInputRow(1L, Lists.newArrayList("d1", "d2"), ImmutableMap.<String, Object>of("d1", "a", "d2", "", "A", 1)));
    index1.add(new MapBasedInputRow(1L, Lists.newArrayList("d1", "d2"), ImmutableMap.<String, Object>of("d1", "b", "d2", "", "A", 1)));
    final File tempDir = temporaryFolder.newFolder();
    QueryableIndex index = closer.closeLater(INDEX_IO.loadIndex(INDEX_MERGER.persist(index1, tempDir, indexSpec)));
    List<String> expectedColumnNames = Arrays.asList("A", "d1");
    List<String> actualColumnNames = Lists.newArrayList(index.getColumnNames());
    Collections.sort(expectedColumnNames);
    Collections.sort(actualColumnNames);
    Assert.assertEquals(expectedColumnNames, actualColumnNames);
    SmooshedFileMapper sfm = closer.closeLater(SmooshedFileMapper.load(tempDir));
    List<String> expectedFilenames = Arrays.asList("A", "__time", "d1", "index.drd", "metadata.drd");
    List<String> actualFilenames = new ArrayList<>(sfm.getInternalFilenames());
    Collections.sort(expectedFilenames);
    Collections.sort(actualFilenames);
    Assert.assertEquals(expectedFilenames, actualFilenames);
}

Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) ArrayList(java.util.ArrayList) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) File(java.io.File) SmooshedFileMapper(io.druid.java.util.common.io.smoosh.SmooshedFileMapper) IncrementalIndexTest(io.druid.segment.data.IncrementalIndexTest) Test(org.junit.Test)

Aggregations

SmooshedFileMapper (io.druid.java.util.common.io.smoosh.SmooshedFileMapper)6 File (java.io.File)5 FileSmoosher (io.druid.java.util.common.io.smoosh.FileSmoosher)4 SmooshedWriter (io.druid.java.util.common.io.smoosh.SmooshedWriter)4 ColumnBuilder (io.druid.segment.column.ColumnBuilder)2 Test (org.junit.Test)2 ImmutableBitmap (io.druid.collections.bitmap.ImmutableBitmap)1 ImmutableRTree (io.druid.collections.spatial.ImmutableRTree)1 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)1 HyperLogLogCollector (io.druid.hll.HyperLogLogCollector)1 IAE (io.druid.java.util.common.IAE)1 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)1 CompressedVSizeIndexedV3Supplier (io.druid.segment.CompressedVSizeIndexedV3Supplier)1 Column (io.druid.segment.column.Column)1 ColumnConfig (io.druid.segment.column.ColumnConfig)1 ComplexColumn (io.druid.segment.column.ComplexColumn)1 IOPeon (io.druid.segment.data.IOPeon)1 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)1 IndexedInts (io.druid.segment.data.IndexedInts)1 IndexedMultivalue (io.druid.segment.data.IndexedMultivalue)1