Search in sources :

Example 1 with SliceSelectiveStreamReader

use of com.facebook.presto.orc.reader.SliceSelectiveStreamReader in project presto by prestodb.

the class TestOrcSelectiveStreamReaders method testEmptyStrings.

/**
 * This test tests SliceDirectSelectiveStreamReader for the case where all elements to read are empty strings. The output Block should be a valid VariableWidthBlock with an
 * empty Slice. It is to simulate a problem seen in production. The state of SliceDirectSelectiveStreamReader to reproduce the problem is:
 * - dataStream: null
 * - presentStream: null
 * - lengthStream: not null
 * - filter: null
 * - outputRequired: true
 * - offsets array: non zeros
 * The test issues two reads, the first one reads a non-empty string and populates non-zero offsets. The second one reads the empty string with the above conditions met.
 */
@Test
public void testEmptyStrings() throws Exception {
    Type type = VARCHAR;
    List<Type> types = ImmutableList.of(type);
    List<List<?>> values = ImmutableList.of(ImmutableList.of("a", ""));
    for (OrcTester.Format format : formats) {
        if (!types.stream().allMatch(readType -> format.supportsType(readType))) {
            return;
        }
        for (CompressionKind compression : compressions) {
            TempFile tempFile = new TempFile();
            writeOrcColumnsPresto(tempFile.getFile(), format, compression, Optional.empty(), types, values, new OrcWriterStats());
            OrcPredicate orcPredicate = createOrcPredicate(types, values, DWRF, false);
            Map<Integer, Type> includedColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableMap(Function.identity(), types::get));
            List<Integer> outputColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableList());
            OrcAggregatedMemoryContext systemMemoryUsage = new TestingHiveOrcAggregatedMemoryContext();
            try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), format.getOrcEncoding(), orcPredicate, types, 1, ImmutableMap.of(), ImmutableList.of(), ImmutableMap.of(), OrcTester.OrcReaderSettings.builder().build().getRequiredSubfields(), ImmutableMap.of(), ImmutableMap.of(), includedColumns, outputColumns, false, systemMemoryUsage, false)) {
                assertEquals(recordReader.getReaderPosition(), 0);
                assertEquals(recordReader.getFilePosition(), 0);
                SelectiveStreamReader streamReader = recordReader.getStreamReaders()[0];
                // Read the first non-empty element. Do not call streamReader.getBlock() to preserve the offsets array in SliceDirectSelectiveStreamReader.
                int batchSize = min(recordReader.prepareNextBatch(), 1);
                int[] positions = IntStream.range(0, batchSize).toArray();
                streamReader.read(0, positions, batchSize);
                recordReader.batchRead(batchSize);
                // Read the second element: an empty string. Set the dataStream in SliceDirectSelectiveStreamReader to null to simulate the conditions causing the problem.
                ((SliceSelectiveStreamReader) streamReader).resetDataStream();
                batchSize = min(recordReader.prepareNextBatch(), 1);
                positions = IntStream.range(0, batchSize).toArray();
                streamReader.read(0, positions, batchSize);
                recordReader.batchRead(batchSize);
                Block block = streamReader.getBlock(positions, batchSize);
                List<?> expectedValues = ImmutableList.of("");
                assertBlockEquals(type, block, expectedValues, 0);
                assertEquals(recordReader.getReaderPosition(), 1);
                assertEquals(recordReader.getFilePosition(), 1);
            }
        }
    }
}
Also used : IntStream(java.util.stream.IntStream) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) VARCHAR(com.facebook.presto.common.type.VarcharType.VARCHAR) SliceSelectiveStreamReader(com.facebook.presto.orc.reader.SliceSelectiveStreamReader) ORC_11(com.facebook.presto.orc.OrcTester.Format.ORC_11) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) ORC_12(com.facebook.presto.orc.OrcTester.Format.ORC_12) Function(java.util.function.Function) OrcTester.assertBlockEquals(com.facebook.presto.orc.OrcTester.assertBlockEquals) ZLIB(com.facebook.presto.orc.metadata.CompressionKind.ZLIB) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) OrcTester.writeOrcColumnsPresto(com.facebook.presto.orc.OrcTester.writeOrcColumnsPresto) Type(com.facebook.presto.common.type.Type) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) NONE(com.facebook.presto.orc.metadata.CompressionKind.NONE) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Set(java.util.Set) SNAPPY(com.facebook.presto.orc.metadata.CompressionKind.SNAPPY) Math.min(java.lang.Math.min) DWRF(com.facebook.presto.orc.OrcTester.Format.DWRF) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) SelectiveStreamReader(com.facebook.presto.orc.reader.SelectiveStreamReader) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) LZ4(com.facebook.presto.orc.metadata.CompressionKind.LZ4) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Optional(java.util.Optional) Block(com.facebook.presto.common.block.Block) ZSTD(com.facebook.presto.orc.metadata.CompressionKind.ZSTD) OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) CompressionKind(com.facebook.presto.orc.metadata.CompressionKind) Type(com.facebook.presto.common.type.Type) SliceSelectiveStreamReader(com.facebook.presto.orc.reader.SliceSelectiveStreamReader) SelectiveStreamReader(com.facebook.presto.orc.reader.SelectiveStreamReader) Block(com.facebook.presto.common.block.Block) ImmutableList(com.google.common.collect.ImmutableList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) SliceSelectiveStreamReader(com.facebook.presto.orc.reader.SliceSelectiveStreamReader) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Test(org.testng.annotations.Test)

Aggregations

Block (com.facebook.presto.common.block.Block)1 Type (com.facebook.presto.common.type.Type)1 VARCHAR (com.facebook.presto.common.type.VarcharType.VARCHAR)1 DWRF (com.facebook.presto.orc.OrcTester.Format.DWRF)1 ORC_11 (com.facebook.presto.orc.OrcTester.Format.ORC_11)1 ORC_12 (com.facebook.presto.orc.OrcTester.Format.ORC_12)1 OrcTester.assertBlockEquals (com.facebook.presto.orc.OrcTester.assertBlockEquals)1 OrcTester.createCustomOrcSelectiveRecordReader (com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader)1 OrcTester.writeOrcColumnsPresto (com.facebook.presto.orc.OrcTester.writeOrcColumnsPresto)1 TestingOrcPredicate.createOrcPredicate (com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate)1 CompressionKind (com.facebook.presto.orc.metadata.CompressionKind)1 LZ4 (com.facebook.presto.orc.metadata.CompressionKind.LZ4)1 NONE (com.facebook.presto.orc.metadata.CompressionKind.NONE)1 SNAPPY (com.facebook.presto.orc.metadata.CompressionKind.SNAPPY)1 ZLIB (com.facebook.presto.orc.metadata.CompressionKind.ZLIB)1 ZSTD (com.facebook.presto.orc.metadata.CompressionKind.ZSTD)1 SelectiveStreamReader (com.facebook.presto.orc.reader.SelectiveStreamReader)1 SliceSelectiveStreamReader (com.facebook.presto.orc.reader.SliceSelectiveStreamReader)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1