Examples with StorageOrcFileTailSource - com.facebook.presto.orc.cache.StorageOrcFileTailSource

Example 1 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class OrcTestingUtil method createReader.

public static OrcBatchRecordReader createReader(OrcDataSource dataSource, List<Long> columnIds, List<Type> types) throws IOException {
    OrcReader orcReader = new OrcReader(dataSource, ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    List<String> columnNames = orcReader.getColumnNames();
    assertEquals(columnNames.size(), columnIds.size());
    Map<Integer, Type> includedColumns = new HashMap<>();
    int ordinal = 0;
    for (long columnId : columnIds) {
        assertEquals(columnNames.get(ordinal), String.valueOf(columnId));
        includedColumns.put(ordinal, types.get(ordinal));
        ordinal++;
    }
    return createRecordReader(orcReader, includedColumns);
}

Also used : HashMap(java.util.HashMap) RuntimeStats(com.facebook.presto.common.RuntimeStats) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource)

Example 2 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestOrcFileRewriter method testRewriterDropThenAddDifferentColumns.

/**
 * The following test add or drop different columns
 */
@Test
public void testRewriterDropThenAddDifferentColumns() throws Exception {
    FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
    DBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
    dbi.registerMapper(new TableColumn.Mapper(functionAndTypeManager));
    Handle dummyHandle = dbi.open();
    File dataDir = Files.createTempDir();
    StorageManager storageManager = createOrcStorageManager(dbi, dataDir);
    List<Long> columnIds = ImmutableList.of(3L, 7L);
    List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
    File file = new File(temporary, randomUUID().toString());
    try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
        List<Page> pages = rowPagesBuilder(columnTypes).row(1L, "1").row(2L, "2").row(3L, "3").row(4L, "4").build();
        writer.appendPages(pages);
    }
    // Add a column
    File newFile1 = new File(temporary, randomUUID().toString());
    FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
    OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(3L, 7L, 10L), ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE)), path(file), path(newFile1), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    assertEquals(readAllBytes(file.toPath()), readAllBytes(newFile1.toPath()));
    // Drop a column
    File newFile2 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L), ImmutableList.of(createVarcharType(20), DOUBLE)), path(newFile1), path(newFile2), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    // Optimized writer will keep the only column
    OrcReader orcReader = new OrcReader(fileOrcDataSource(newFile2), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), OrcTestingUtil.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    orcReader.getColumnNames().equals(ImmutableList.of("7"));
    // Add a column with the different ID with different type
    File newFile3 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L, 13L), ImmutableList.of(createVarcharType(20), DOUBLE, createVarcharType(5))), path(newFile2), path(newFile3), new BitSet(5));
    assertEquals(info.getRowCount(), 4);
    assertEquals(readAllBytes(newFile2.toPath()), readAllBytes(newFile3.toPath()));
    // Get prepared for the final file; make sure it is accessible from storage manager
    UUID uuid = randomUUID();
    File newFile4 = getFileSystemPath(new File(dataDir, "data/storage"), uuid);
    // Optimized ORC writer does not create the file itself
    newFile4.getParentFile().mkdirs();
    newFile4.createNewFile();
    // Drop a column and add a column; also delete 3 rows
    BitSet rowsToDelete = new BitSet(5);
    rowsToDelete.set(0);
    rowsToDelete.set(1);
    rowsToDelete.set(3);
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 13L, 18L), ImmutableList.of(createVarcharType(20), createVarcharType(5), INTEGER)), path(newFile3), path(newFile4), rowsToDelete);
    assertEquals(info.getRowCount(), 1);
    ConnectorPageSource source = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), TupleDomain.all(), READER_ATTRIBUTES);
    Page page = null;
    while (page == null) {
        page = source.getNextPage();
    }
    assertEquals(page.getPositionCount(), 1);
    // Column 13L
    Block column0 = page.getBlock(0);
    assertTrue(column0.isNull(0));
    // Column 7L
    Block column1 = page.getBlock(1);
    assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("3"));
    // Column 8L
    Block column2 = page.getBlock(2);
    assertTrue(column2.isNull(0));
    // Remove all the columns
    File newFile5 = new File(temporary, randomUUID().toString());
    info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(13L, 18L), ImmutableList.of(createVarcharType(5), INTEGER)), path(newFile4), path(newFile5), new BitSet(5));
    // Optimized writer will drop the file
    assertEquals(info.getRowCount(), 0);
    assertFalse(newFile5.exists());
    dummyHandle.close();
    deleteRecursively(dataDir.toPath(), ALLOW_INSECURE);
}

Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) TestOrcStorageManager.createOrcStorageManager(com.facebook.presto.raptor.storage.TestOrcStorageManager.createOrcStorageManager) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DBI(org.skife.jdbi.v2.DBI) Page(com.facebook.presto.common.Page) RaptorOrcAggregatedMemoryContext(com.facebook.presto.raptor.RaptorOrcAggregatedMemoryContext) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) FunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager) FunctionAndTypeManager.createTestFunctionAndTypeManager(com.facebook.presto.metadata.FunctionAndTypeManager.createTestFunctionAndTypeManager) FileSystem(org.apache.hadoop.fs.FileSystem) LocalOrcDataEnvironment(com.facebook.presto.raptor.filesystem.LocalOrcDataEnvironment) UUID(java.util.UUID) UUID.randomUUID(java.util.UUID.randomUUID) BitSet(java.util.BitSet) TableColumn(com.facebook.presto.raptor.metadata.TableColumn) Handle(org.skife.jdbi.v2.Handle) DecimalType(com.facebook.presto.common.type.DecimalType) VarcharType.createVarcharType(com.facebook.presto.common.type.VarcharType.createVarcharType) ArrayType(com.facebook.presto.common.type.ArrayType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) Block(com.facebook.presto.common.block.Block) StorageStripeMetadataSource(com.facebook.presto.orc.StorageStripeMetadataSource) File(java.io.File) Test(org.testng.annotations.Test)

Example 3 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class OrcTester method assertFileContentsPresto.

private static void assertFileContentsPresto(List<Type> types, TempFile tempFile, List<List<?>> expectedValues, boolean skipFirstBatch, boolean skipStripe, OrcEncoding orcEncoding, Format format, boolean isHiveWriter, boolean useSelectiveOrcReader, List<OrcReaderSettings> settings, Map<Integer, Slice> intermediateEncryptionKeys) throws IOException {
    OrcPredicate orcPredicate = createOrcPredicate(types, expectedValues, format, isHiveWriter);
    Map<Integer, Type> includedColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableMap(Function.identity(), types::get));
    List<Integer> outputColumns = IntStream.range(0, types.size()).boxed().collect(toImmutableList());
    if (useSelectiveOrcReader) {
        assertFileContentsPresto(types, tempFile.getFile(), expectedValues, orcEncoding, orcPredicate, Optional.empty(), ImmutableList.of(), ImmutableMap.of(), ImmutableMap.of(), intermediateEncryptionKeys, includedColumns, outputColumns);
        for (OrcReaderSettings entry : settings) {
            assertTrue(entry.getFilterFunctions().isEmpty(), "Filter functions are not supported yet");
            assertTrue(entry.getFilterFunctionInputMapping().isEmpty(), "Filter functions are not supported yet");
            Map<Integer, Map<Subfield, TupleDomainFilter>> columnFilters = entry.getColumnFilters();
            List<List<?>> prunedAndFilteredRows = pruneValues(types, filterRows(types, expectedValues, columnFilters), entry.getRequiredSubfields());
            Optional<TupleDomainFilterOrderChecker> orderChecker = Optional.empty();
            List<Integer> expectedFilterOrder = entry.getExpectedFilterOrder();
            if (!expectedFilterOrder.isEmpty()) {
                orderChecker = Optional.of(new TupleDomainFilterOrderChecker(expectedFilterOrder));
            }
            Optional<Map<Integer, Map<Subfield, TupleDomainFilter>>> transformedFilters = Optional.of(orderChecker.map(checker -> addOrderTracking(columnFilters, checker)).orElse(columnFilters));
            assertFileContentsPresto(types, tempFile.getFile(), prunedAndFilteredRows, orcEncoding, orcPredicate, transformedFilters, entry.getFilterFunctions(), entry.getFilterFunctionInputMapping(), entry.getRequiredSubfields());
            orderChecker.ifPresent(TupleDomainFilterOrderChecker::assertOrder);
        }
        return;
    }
    try (OrcBatchRecordReader recordReader = createCustomOrcRecordReader(tempFile, orcEncoding, orcPredicate, types, MAX_BATCH_SIZE, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), false, intermediateEncryptionKeys, false)) {
        assertEquals(recordReader.getReaderPosition(), 0);
        assertEquals(recordReader.getFilePosition(), 0);
        boolean isFirst = true;
        int rowsProcessed = 0;
        for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(recordReader.nextBatch())) {
            if (skipStripe && rowsProcessed < 10000) {
            // skip recordReader.readBlock
            } else if (skipFirstBatch && isFirst) {
                // skip recordReader.readBlock
                isFirst = false;
            } else {
                for (int i = 0; i < types.size(); i++) {
                    Type type = types.get(i);
                    Block block = recordReader.readBlock(i);
                    assertEquals(block.getPositionCount(), batchSize);
                    checkNullValues(type, block);
                    assertBlockEquals(type, block, expectedValues.get(i), rowsProcessed);
                }
            }
            assertEquals(recordReader.getReaderPosition(), rowsProcessed);
            assertEquals(recordReader.getFilePosition(), rowsProcessed);
            rowsProcessed += batchSize;
        }
        assertEquals(rowsProcessed, expectedValues.get(0).size());
        assertEquals(recordReader.getReaderPosition(), rowsProcessed);
        assertEquals(recordReader.getFilePosition(), rowsProcessed);
    }
}

Also used : StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) TupleDomainFilter(com.facebook.presto.common.predicate.TupleDomainFilter) BigInteger(java.math.BigInteger) DecimalType(com.facebook.presto.common.type.DecimalType) ArrayType(com.facebook.presto.common.type.ArrayType) CharType(com.facebook.presto.common.type.CharType) RowType(com.facebook.presto.common.type.RowType) VarcharType(com.facebook.presto.common.type.VarcharType) VarbinaryType(com.facebook.presto.common.type.VarbinaryType) MapType(com.facebook.presto.common.type.MapType) Type(com.facebook.presto.common.type.Type) Block(com.facebook.presto.common.block.Block) Arrays.asList(java.util.Arrays.asList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) TestingOrcPredicate.createOrcPredicate(com.facebook.presto.orc.TestingOrcPredicate.createOrcPredicate) Subfield(com.facebook.presto.common.Subfield)

Example 4 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestReadBloomFilter method createCustomOrcRecordReader.

private static <T> OrcBatchRecordReader createCustomOrcRecordReader(TempFile tempFile, Type type, Optional<T> filterValue, boolean bloomFilterEnabled) throws IOException {
    OrcPredicate predicate = filterValue.map(value -> makeOrcPredicate(type, value, bloomFilterEnabled)).map(OrcPredicate.class::cast).orElse(TRUE);
    OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true);
    OrcReader orcReader = new OrcReader(orcDataSource, OrcEncoding.ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), NOOP_ORC_AGGREGATED_MEMORY_CONTEXT, OrcReaderTestingUtils.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
    assertEquals(orcReader.getColumnNames(), ImmutableList.of("test"));
    assertEquals(orcReader.getFooter().getRowsInRowGroup(), 10_000);
    return orcReader.createBatchRecordReader(ImmutableMap.of(0, type), predicate, HIVE_STORAGE_TIME_ZONE, new TestingHiveOrcAggregatedMemoryContext(), MAX_BATCH_SIZE);
}

Also used : RuntimeStats(com.facebook.presto.common.RuntimeStats) DataSize(io.airlift.units.DataSize) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource)

Example 5 with StorageOrcFileTailSource

use of com.facebook.presto.orc.cache.StorageOrcFileTailSource in project presto by prestodb.

the class TestStorageOrcFileTailSource method testReadDwrfStripeCacheIfEnabledButAbsent.

@Test
public void testReadDwrfStripeCacheIfEnabledButAbsent() throws IOException {
    FileOutputStream out = new FileOutputStream(file.getFile());
    // write the footer and post script
    DwrfProto.Footer.Builder footer = DwrfProto.Footer.newBuilder();
    DwrfProto.PostScript.Builder postScript = DwrfProto.PostScript.newBuilder().setCompression(NONE);
    writeTail(footer, postScript, out);
    out.close();
    // read the file tail with the enabled "read dwrf stripe cache" feature
    StorageOrcFileTailSource src = new StorageOrcFileTailSource(FOOTER_READ_SIZE_IN_BYTES, true);
    OrcDataSource orcDataSource = createFileOrcDataSource();
    OrcFileTail orcFileTail = src.getOrcFileTail(orcDataSource, metadataReader, Optional.empty(), false);
    assertEquals(orcFileTail.getMetadataSize(), 0);
    DwrfProto.Footer actualFooter = readFooter(orcFileTail);
    assertEquals(actualFooter, footer.build());
    // the feature is enabled, but file doesn't have the stripe cache
    assertFalse(orcFileTail.getDwrfStripeCacheData().isPresent());
}

Also used : FileOutputStream(java.io.FileOutputStream) StorageOrcFileTailSource(com.facebook.presto.orc.cache.StorageOrcFileTailSource) DwrfProto(com.facebook.presto.orc.proto.DwrfProto) OrcFileTail(com.facebook.presto.orc.metadata.OrcFileTail) Test(org.testng.annotations.Test)

Aggregations

StorageOrcFileTailSource (com.facebook.presto.orc.cache.StorageOrcFileTailSource)32 RuntimeStats (com.facebook.presto.common.RuntimeStats)15 Test (org.testng.annotations.Test)15 StorageStripeMetadataSource (com.facebook.presto.orc.StorageStripeMetadataSource)13 DataSize (io.airlift.units.DataSize)12 OrcBatchPageSourceFactory (com.facebook.presto.hive.orc.OrcBatchPageSourceFactory)8 ParquetPageSourceFactory (com.facebook.presto.hive.parquet.ParquetPageSourceFactory)7 RcFilePageSourceFactory (com.facebook.presto.hive.rcfile.RcFilePageSourceFactory)7 OrcFileTail (com.facebook.presto.orc.metadata.OrcFileTail)7 List (java.util.List)7 Type (com.facebook.presto.common.type.Type)6 ImmutableList (com.google.common.collect.ImmutableList)6 Optional (java.util.Optional)6 CacheConfig (com.facebook.presto.cache.CacheConfig)5 ArrayType (com.facebook.presto.common.type.ArrayType)5 StripeMetadataSourceFactory (com.facebook.presto.orc.StripeMetadataSourceFactory)5 BIGINT (com.facebook.presto.common.type.BigintType.BIGINT)4 BOOLEAN (com.facebook.presto.common.type.BooleanType.BOOLEAN)4 DOUBLE (com.facebook.presto.common.type.DoubleType.DOUBLE)4 INTEGER (com.facebook.presto.common.type.IntegerType.INTEGER)4