Search in sources :

Example 11 with Page

use of com.facebook.presto.common.Page in project presto by prestodb.

the class TestOrcFileRewriter method testRewriterDropThenAddDifferentColumns.

/**
 * The following test add or drop different columns
 */
@Test
public void testRewriterDropThenAddDifferentColumns() throws Exception {
    FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
    DBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
    dbi.registerMapper(new TableColumn.Mapper(functionAndTypeManager));
    Handle dummyHandle = dbi.open();
    File dataDir = Files.createTempDir();
    StorageManager storageManager = createOrcStorageManager(dbi, dataDir);
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
    File file = new File(temporary, randomUUID().toString());
    try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(1L, "1").row(2L, "2").row(3L, "3").row(4L, "4").build();
        writer.appendPages(pages);
    }
    // Add a column
    File newFile1 = new File(temporary, randomUUID().toString());
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(3L, 7L, 10L), ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE)), path(file), path(newFile1), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    assertEquals(readAllBytes(file.toPath()), readAllBytes(newFile1.toPath()));
    // Drop a column
    File newFile2 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L), ImmutableList.of(createVarcharType(20), DOUBLE)), path(newFile1), path(newFile2), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    // Optimized writer will keep the only column
    OrcReader orcReader = new OrcReader(fileOrcDataSource(newFile2), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), OrcTestingUtil.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    orcReader.getColumnNames().equals(ImmutableList.of("7"));
    // Add a column with the different ID with different type
    File newFile3 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L, 13L), ImmutableList.of(createVarcharType(20), DOUBLE, createVarcharType(5))), path(newFile2), path(newFile3), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    assertEquals(readAllBytes(newFile2.toPath()), readAllBytes(newFile3.toPath()));
    // Get prepared for the final file; make sure it is accessible from storage manager
    UUID uuid = randomUUID();
    File newFile4 = getFileSystemPath(new File(dataDir, "data/storage"), uuid);
    // Optimized ORC writer does not create the file itself
    newFile4.getParentFile().mkdirs();
    newFile4.createNewFile();
    // Drop a column and add a column; also delete 3 rows
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(0);
    rowsToDelete.set(1);
    rowsToDelete.set(3);
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 13L, 18L), ImmutableList.of(createVarcharType(20), createVarcharType(5), INTEGER)), path(newFile3), path(newFile4), rowsToDelete);
    assertEquals(info.getRowCount(), 1);
    ConnectorPageSource source = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), TupleDomain.all(), READER_ATTRIBUTES);
    Page page = null;
    while (page == null) {
        page = source.getNextPage();
    }
    assertEquals(page.getPositionCount(), 1);
    // Column 13L
    Block column0 = page.getBlock(0);
    assertTrue(column0.isNull(0));
    // Column 7L
    Block column1 = page.getBlock(1);
    assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("3"));
    // Column 8L
    Block column2 = page.getBlock(2);
    assertTrue(column2.isNull(0));
    // Remove all the columns
    File newFile5 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(13L, 18L), ImmutableList.of(createVarcharType(5), INTEGER)), path(newFile4), path(newFile5), new BitSet(5));
    // Optimized writer will drop the file
    assertEquals(info.getRowCount(), 0);
    assertFalse(newFile5.exists());
    dummyHandle.close();
    deleteRecursively(dataDir.toPath(), ALLOW_INSECURE);
}
Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) TestOrcStorageManager.createOrcStorageManager(com.facebook.presto.raptor.storage.TestOrcStorageManager.createOrcStorageManager) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DBI(org.skife.jdbi.v2.DBI) Page(com.facebook.presto.common.Page) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) BitSet(java.util.BitSet) TableColumn(com.facebook.presto.raptor.metadata.TableColumn) Handle(org.skife.jdbi.v2.Handle) DecimalType(com.facebook.presto.common.type.DecimalType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) Block(com.facebook.presto.common.block.Block) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) File(java.io.File) Test(org.testng.annotations.Test)

Example 12 with Page

use of com.facebook.presto.common.Page in project presto by prestodb.

the class OrcTester method writeOrcColumnsPresto.

public static void writeOrcColumnsPresto(File outputFile, Format format, CompressionKind compression, Optional<DwrfWriterEncryption> dwrfWriterEncryption, List<Type> types, List<List<?>> values, WriterStats stats) throws Exception {
    OrcWriter writer = createOrcWriter(outputFile, format.orcEncoding, compression, dwrfWriterEncryption, types, OrcWriterOptions.builder().build(), stats);
    Block[] blocks = new Block[types.size()];
    for (int i = 0; i < types.size(); i++) {
        Type type = types.get(i);
        BlockBuilder blockBuilder = type.createBlockBuilder(null, values.size());
        for (Object value : values.get(i)) {
            writeValue(type, blockBuilder, value);
        }
        blocks[i] = blockBuilder.build();
    }
    writer.write(new Page(blocks));
    writer.close();
    writer.validate(new FileOrcDataSource(outputFile, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true));
}
Also used : DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) RowType(com.facebook.presto.common.type.RowType) VarcharType(com.facebook.presto.common.type.VarcharType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) MapType(com.facebook.presto.common.type.MapType) Type(com.facebook.presto.common.type.Type) DataSize(io.airlift.units.DataSize) Block(com.facebook.presto.common.block.Block) OrcLazyObject(com.facebook.hive.orc.lazy.OrcLazyObject) Page(com.facebook.presto.common.Page) BlockBuilder(com.facebook.presto.common.block.BlockBuilder)

Example 13 with Page

use of com.facebook.presto.common.Page in project presto by prestodb.

the class TestSelectiveOrcReader method testHiddenConstantColumns.

@Test
public void testHiddenConstantColumns() throws Exception {
    Type type = BIGINT;
    List<Type> types = ImmutableList.of(type);
    List<List<?>> values = ImmutableList.of(ImmutableList.of(1L, 2L));
    TempFile tempFile = new TempFile();
    writeOrcColumnsPresto(tempFile.getFile(), DWRF, ZSTD, Optional.empty(), types, values, new OrcWriterStats());
    // Hidden columns like partition columns use negative indices (-13).
    int hiddenColumnIndex = -13;
    Map<Integer, Type> includedColumns = ImmutableMap.of(hiddenColumnIndex, VARCHAR, 0, BIGINT);
    List<Integer> outputColumns = ImmutableList.of(hiddenColumnIndex, 0);
    Slice constantSlice = Slices.utf8Slice("partition_value");
    Map<Integer, Object> constantValues = ImmutableMap.of(hiddenColumnIndex, constantSlice);
    OrcAggregatedMemoryContext systemMemoryUsage = new TestingHiveOrcAggregatedMemoryContext();
    TupleDomainFilter filter = BigintRange.of(1, 1, false);
    Map<Subfield, TupleDomainFilter> subFieldFilter = toSubfieldFilter(filter);
    OrcReaderSettings readerSettings = OrcTester.OrcReaderSettings.builder().setColumnFilters(ImmutableMap.of(0, subFieldFilter)).build();
    try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), DWRF.getOrcEncoding(), OrcPredicate.TRUE, types, 1, readerSettings.getColumnFilters(), readerSettings.getFilterFunctions(), readerSettings.getFilterFunctionInputMapping(), readerSettings.getRequiredSubfields(), constantValues, ImmutableMap.of(), includedColumns, outputColumns, false, systemMemoryUsage, false)) {
        Page page = recordReader.getNextPage();
        assertEquals(page.getPositionCount(), 1);
        Block partitionValueBlock = page.getBlock(0);
        int length = partitionValueBlock.getSliceLength(0);
        Slice varcharSlice = partitionValueBlock.getSlice(0, 0, length);
        assertEquals(varcharSlice, constantSlice);
        Block bigintBlock = page.getBlock(1);
        assertEquals(bigintBlock.getLong(0), 1);
        assertNull(recordReader.getNextPage());
    }
}
Also used : OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) OrcReaderSettings(com.facebook.presto.orc.OrcTester.OrcReaderSettings) Page(com.facebook.presto.common.Page) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) BigInteger(java.math.BigInteger) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) DecimalType(com.facebook.presto.common.type.DecimalType) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) OrcTester.arrayType(com.facebook.presto.orc.OrcTester.arrayType) OrcTester.rowType(com.facebook.presto.orc.OrcTester.rowType) Slice(io.airlift.slice.Slice) Block(com.facebook.presto.common.block.Block) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) List(java.util.List) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Subfield(com.facebook.presto.common.Subfield) Test(org.testng.annotations.Test)

Example 14 with Page

use of com.facebook.presto.common.Page in project presto by prestodb.

the class TestSelectiveOrcReader method testAdaptiveBatchSizes.

@Test
public void testAdaptiveBatchSizes() throws Exception {
    Type type = VARCHAR;
    List<Type> types = ImmutableList.of(type);
    TempFile tempFile = new TempFile();
    List<String> values = new ArrayList<>();
    int rowCount = 10000;
    int longStringLength = 5000;
    Random random = new Random();
    long start = System.currentTimeMillis();
    for (int i = 0; i < rowCount; ++i) {
        if (i < MAX_BATCH_SIZE) {
            StringBuilder builder = new StringBuilder();
            for (int j = 0; j < longStringLength; ++j) {
                builder.append(random.nextInt(10));
            }
            values.add(builder.toString());
        } else {
            values.add("");
        }
    }
    System.out.println(System.currentTimeMillis() - start);
    writeOrcColumnsPresto(tempFile.getFile(), DWRF, NONE, Optional.empty(), types, ImmutableList.of(values), new OrcWriterStats());
    try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile, OrcEncoding.DWRF, OrcPredicate.TRUE, type, MAX_BATCH_SIZE, false, false)) {
        assertEquals(recordReader.getFileRowCount(), rowCount);
        assertEquals(recordReader.getReaderRowCount(), rowCount);
        assertEquals(recordReader.getFilePosition(), 0);
        assertEquals(recordReader.getReaderPosition(), 0);
        // Size of the first batch should equal to the initial batch size (set to MAX_BATCH_SIZE)
        Page page = recordReader.getNextPage();
        assertNotNull(page);
        page = page.getLoadedPage();
        assertEquals(page.getPositionCount(), MAX_BATCH_SIZE);
        // Later batches should be adjusted based on maxCombinedBytesPerRow collected during the first batch read
        while (true) {
            page = recordReader.getNextPage();
            assertNotNull(page);
            page = page.getLoadedPage();
            if (recordReader.getReadPositions() < rowCount) {
                assertEquals(page.getPositionCount(), MAX_BLOCK_SIZE.toBytes() / (longStringLength + Integer.BYTES + Byte.BYTES));
            } else {
                break;
            }
        }
    }
}
Also used : OrcTester.createCustomOrcSelectiveRecordReader(com.facebook.presto.orc.OrcTester.createCustomOrcSelectiveRecordReader) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) Page(com.facebook.presto.common.Page) CharType.createCharType(com.facebook.presto.common.type.CharType.createCharType) OrcTester.mapType(com.facebook.presto.orc.OrcTester.mapType) DecimalType(com.facebook.presto.common.type.DecimalType) CharType(com.facebook.presto.common.type.CharType) Type(com.facebook.presto.common.type.Type) OrcTester.arrayType(com.facebook.presto.orc.OrcTester.arrayType) OrcTester.rowType(com.facebook.presto.orc.OrcTester.rowType) Random(java.util.Random) Test(org.testng.annotations.Test)

Example 15 with Page

use of com.facebook.presto.common.Page in project presto by prestodb.

the class TestOrcWriter method testStreamOrder.

private void testStreamOrder(OrcEncoding encoding, CompressionKind kind, OptionalInt level, StreamLayoutFactory streamLayoutFactory, Supplier<Consumer<Stream>> streamConsumerFactory) throws IOException {
    OrcWriterOptions orcWriterOptions = OrcWriterOptions.builder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).build()).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)).withCompressionLevel(level).withStreamLayoutFactory(streamLayoutFactory).build();
    for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
        TempFile tempFile = new TempFile();
        OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR), encoding, kind, Optional.empty(), NO_ENCRYPTION, orcWriterOptions, ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, validationMode, new OrcWriterStats());
        // write down some data with unsorted streams
        String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
        Block[] blocks = new Block[data.length];
        int entries = 65536;
        BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
        for (int i = 0; i < data.length; i++) {
            byte[] bytes = data[i].getBytes();
            for (int j = 0; j < entries; j++) {
                // force to write different data
                bytes[0] = (byte) ((bytes[0] + 1) % 128);
                blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
                blockBuilder.closeEntry();
            }
            blocks[i] = blockBuilder.build();
            blockBuilder = blockBuilder.newBlockBuilderLike(null);
        }
        writer.write(new Page(blocks));
        writer.close();
        for (StripeFooter stripeFooter : OrcTester.getStripes(tempFile.getFile(), encoding)) {
            Consumer<Stream> streamConsumer = streamConsumerFactory.get();
            boolean dataStreamStarted = false;
            for (Stream stream : stripeFooter.getStreams()) {
                if (isIndexStream(stream)) {
                    assertFalse(dataStreamStarted);
                    continue;
                }
                dataStreamStarted = true;
                streamConsumer.accept(stream);
            }
        }
    }
}
Also used : Page(com.facebook.presto.common.Page) StripeFooter(com.facebook.presto.orc.metadata.StripeFooter) DataSize(io.airlift.units.DataSize) FileOutputStream(java.io.FileOutputStream) OrcWriteValidationMode(com.facebook.presto.orc.OrcWriteValidation.OrcWriteValidationMode) Block(com.facebook.presto.common.block.Block) StripeReader.isIndexStream(com.facebook.presto.orc.StripeReader.isIndexStream) FileOutputStream(java.io.FileOutputStream) Stream(com.facebook.presto.orc.metadata.Stream) OutputStreamDataSink(com.facebook.presto.common.io.OutputStreamDataSink) BlockBuilder(com.facebook.presto.common.block.BlockBuilder)

Aggregations

Page (com.facebook.presto.common.Page)545 Test (org.testng.annotations.Test)273 Block (com.facebook.presto.common.block.Block)146 Type (com.facebook.presto.common.type.Type)129 MaterializedResult (com.facebook.presto.testing.MaterializedResult)102 PlanNodeId (com.facebook.presto.spi.plan.PlanNodeId)89 ImmutableList (com.google.common.collect.ImmutableList)73 DataSize (io.airlift.units.DataSize)69 RowPagesBuilder (com.facebook.presto.RowPagesBuilder)65 BlockBuilder (com.facebook.presto.common.block.BlockBuilder)52 ArrayList (java.util.ArrayList)50 List (java.util.List)48 Optional (java.util.Optional)44 RunLengthEncodedBlock (com.facebook.presto.common.block.RunLengthEncodedBlock)43 OperatorAssertion.toMaterializedResult (com.facebook.presto.operator.OperatorAssertion.toMaterializedResult)38 PrestoException (com.facebook.presto.spi.PrestoException)38 TestingTaskContext (com.facebook.presto.testing.TestingTaskContext)36 ArrayType (com.facebook.presto.common.type.ArrayType)35 IOException (java.io.IOException)31 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)29