Search in sources :

Example 1 with BlockMissingException

use of org.apache.hadoop.hdfs.BlockMissingException in project hadoop by apache.

the class TestDataNodeHotSwapVolumes method testRemoveOneVolume.

@Test(timeout = 60000)
public void testRemoveOneVolume() throws ReconfigurationException, InterruptedException, TimeoutException, IOException {
    startDFSCluster(1, 1);
    final short replFactor = 1;
    Path testFile = new Path("/test");
    createFile(testFile, 10, replFactor);
    DataNode dn = cluster.getDataNodes().get(0);
    Collection<String> oldDirs = getDataDirs(dn);
    // Keep the first volume.
    String newDirs = oldDirs.iterator().next();
    assertThat("DN did not update its own config", dn.reconfigurePropertyImpl(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY, newDirs), is(dn.getConf().get(DFS_DATANODE_DATA_DIR_KEY)));
    assertFileLocksReleased(new ArrayList<String>(oldDirs).subList(1, oldDirs.size()));
    dn.scheduleAllBlockReport(0);
    try {
        DFSTestUtil.readFile(cluster.getFileSystem(), testFile);
        fail("Expect to throw BlockMissingException.");
    } catch (BlockMissingException e) {
        GenericTestUtils.assertExceptionContains("Could not obtain block", e);
    }
    Path newFile = new Path("/newFile");
    createFile(newFile, 6);
    String bpid = cluster.getNamesystem().getBlockPoolId();
    List<Map<DatanodeStorage, BlockListAsLongs>> blockReports = cluster.getAllBlockReports(bpid);
    assertEquals((int) replFactor, blockReports.size());
    BlockListAsLongs blocksForVolume1 = blockReports.get(0).values().iterator().next();
    // The first volume has half of the testFile and full of newFile.
    assertEquals(10 / 2 + 6, blocksForVolume1.getNumberOfBlocks());
}
Also used : Path(org.apache.hadoop.fs.Path) ArrayList(java.util.ArrayList) BlockListAsLongs(org.apache.hadoop.hdfs.protocol.BlockListAsLongs) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) CoreMatchers.containsString(org.hamcrest.CoreMatchers.containsString) Matchers.anyString(org.mockito.Matchers.anyString) Map(java.util.Map) Test(org.junit.Test)

Example 2 with BlockMissingException

use of org.apache.hadoop.hdfs.BlockMissingException in project presto by prestodb.

the class IcebergPageSourceProvider method createParquetPageSource.

private static ConnectorPageSource createParquetPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, SchemaTableName tableName, List<IcebergColumnHandle> regularColumns, boolean useParquetColumnNames, DataSize maxReadBlockSize, boolean batchReaderEnabled, boolean verificationEnabled, TupleDomain<IcebergColumnHandle> effectivePredicate, FileFormatDataSourceStats fileFormatDataSourceStats, boolean columnIndexFilterEnabled) {
    AggregatedMemoryContext systemMemoryContext = newSimpleAggregatedMemoryContext();
    ParquetDataSource dataSource = null;
    try {
        ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        long fileSize = fileStatus.getLen();
        long modificationTime = fileStatus.getModificationTime();
        HiveFileContext hiveFileContext = new HiveFileContext(true, NO_CACHE_CONSTRAINTS, Optional.empty(), Optional.of(fileSize), modificationTime, false);
        FSDataInputStream inputStream = fileSystem.openFile(path, hiveFileContext);
        dataSource = buildHdfsParquetDataSource(inputStream, path, fileFormatDataSourceStats);
        ParquetMetadata parquetMetadata = MetadataReader.readFooter(dataSource, fileSize).getParquetMetadata();
        FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
        MessageType fileSchema = fileMetaData.getSchema();
        // Mapping from Iceberg field ID to Parquet fields.
        Map<Integer, org.apache.parquet.schema.Type> parquetIdToField = fileSchema.getFields().stream().filter(field -> field.getId() != null).collect(toImmutableMap(field -> field.getId().intValue(), Function.identity()));
        List<org.apache.parquet.schema.Type> parquetFields = regularColumns.stream().map(column -> {
            if (parquetIdToField.isEmpty()) {
                // This is a migrated table
                return getParquetTypeByName(column.getName(), fileSchema);
            }
            return parquetIdToField.get(column.getId());
        }).collect(toList());
        // TODO: support subfield pushdown
        MessageType requestedSchema = new MessageType(fileSchema.getName(), parquetFields.stream().filter(Objects::nonNull).collect(toImmutableList()));
        Map<List<String>, RichColumnDescriptor> descriptorsByPath = getDescriptors(fileSchema, requestedSchema);
        TupleDomain<ColumnDescriptor> parquetTupleDomain = getParquetTupleDomain(descriptorsByPath, effectivePredicate);
        Predicate parquetPredicate = buildPredicate(requestedSchema, parquetTupleDomain, descriptorsByPath);
        final ParquetDataSource finalDataSource = dataSource;
        List<BlockMetaData> blocks = new ArrayList<>();
        List<ColumnIndexStore> blockIndexStores = new ArrayList<>();
        for (BlockMetaData block : parquetMetadata.getBlocks()) {
            long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset();
            Optional<ColumnIndexStore> columnIndexStore = ColumnIndexFilterUtils.getColumnIndexStore(parquetPredicate, finalDataSource, block, descriptorsByPath, columnIndexFilterEnabled);
            if ((firstDataPage >= start) && (firstDataPage < (start + length)) && predicateMatches(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain, columnIndexStore, columnIndexFilterEnabled)) {
                blocks.add(block);
                blockIndexStores.add(columnIndexStore.orElse(null));
            }
        }
        MessageColumnIO messageColumnIO = getColumnIO(fileSchema, requestedSchema);
        ParquetReader parquetReader = new ParquetReader(messageColumnIO, blocks, dataSource, systemMemoryContext, maxReadBlockSize, batchReaderEnabled, verificationEnabled, parquetPredicate, blockIndexStores, columnIndexFilterEnabled);
        ImmutableList.Builder<String> namesBuilder = ImmutableList.builder();
        ImmutableList.Builder<Type> prestoTypes = ImmutableList.builder();
        ImmutableList.Builder<Optional<Field>> internalFields = ImmutableList.builder();
        for (int columnIndex = 0; columnIndex < regularColumns.size(); columnIndex++) {
            IcebergColumnHandle column = regularColumns.get(columnIndex);
            namesBuilder.add(column.getName());
            org.apache.parquet.schema.Type parquetField = parquetFields.get(columnIndex);
            Type prestoType = column.getType();
            prestoTypes.add(prestoType);
            if (parquetField == null) {
                internalFields.add(Optional.empty());
            } else {
                internalFields.add(constructField(column.getType(), messageColumnIO.getChild(parquetField.getName())));
            }
        }
        return new ParquetPageSource(parquetReader, prestoTypes.build(), internalFields.build(), namesBuilder.build(), new RuntimeStats());
    } catch (Exception e) {
        try {
            if (dataSource != null) {
                dataSource.close();
            }
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e instanceof ParquetCorruptionException) {
            throw new PrestoException(ICEBERG_BAD_DATA, message, e);
        }
        if (e instanceof BlockMissingException) {
            throw new PrestoException(ICEBERG_MISSING_DATA, message, e);
        }
        throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ORC_ICEBERG_ID_KEY(com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) IcebergSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ICEBERG_BAD_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) IcebergSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) ArrayList(java.util.ArrayList) IcebergSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ROOT_COLUMN_ID(com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID) ICEBERG_MISSING_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) Domain(com.facebook.presto.common.predicate.Domain) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) ColumnHandle(com.facebook.presto.spi.ColumnHandle) IcebergSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OrcReader(com.facebook.presto.orc.OrcReader) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) NO_CACHE_CONSTRAINTS(com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) String.format(java.lang.String.format) IcebergSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) Objects(java.util.Objects) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) IntStream(java.util.stream.IntStream) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) ImmutableList(com.google.common.collect.ImmutableList) ICEBERG_CANNOT_OPEN_SPLIT(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) Field(com.facebook.presto.parquet.Field) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) FileStatus(org.apache.hadoop.fs.FileStatus) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ArrayList(java.util.ArrayList) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate) Predicate(com.facebook.presto.parquet.predicate.Predicate) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) MessageType(org.apache.parquet.schema.MessageType) Optional(java.util.Optional) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) Objects(java.util.Objects) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) RuntimeStats(com.facebook.presto.common.RuntimeStats) PrestoException(com.facebook.presto.spi.PrestoException) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) IOException(java.io.IOException) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) MessageType(org.apache.parquet.schema.MessageType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem)

Example 3 with BlockMissingException

use of org.apache.hadoop.hdfs.BlockMissingException in project hadoop by apache.

the class TestListCorruptFileBlocks method testListCorruptFilesCorruptedBlock.

/** check if nn.getCorruptFiles() returns a file that has corrupted blocks */
@Test(timeout = 300000)
public void testListCorruptFilesCorruptedBlock() throws Exception {
    MiniDFSCluster cluster = null;
    Random random = new Random();
    try {
        Configuration conf = new HdfsConfiguration();
        // datanode scans directories
        conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
        // datanode sends block reports
        conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
        // Set short retry timeouts so this test runs faster
        conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
        cluster = new MiniDFSCluster.Builder(conf).build();
        FileSystem fs = cluster.getFileSystem();
        // create two files with one block each
        DFSTestUtil util = new DFSTestUtil.Builder().setName("testCorruptFilesCorruptedBlock").setNumFiles(2).setMaxLevels(1).setMaxSize(512).build();
        util.createFiles(fs, "/srcdat10");
        // fetch bad file list from namenode. There should be none.
        final NameNode namenode = cluster.getNameNode();
        Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null);
        assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting None.", badFiles.size() == 0);
        // Now deliberately corrupt one block
        String bpid = cluster.getNamesystem().getBlockPoolId();
        File storageDir = cluster.getInstanceStorageDir(0, 1);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid);
        assertTrue("data directory does not exist", data_dir.exists());
        List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
        assertTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.isEmpty());
        File metaFile = metaFiles.get(0);
        RandomAccessFile file = new RandomAccessFile(metaFile, "rw");
        FileChannel channel = file.getChannel();
        long position = channel.size() - 2;
        int length = 2;
        byte[] buffer = new byte[length];
        random.nextBytes(buffer);
        channel.write(ByteBuffer.wrap(buffer), position);
        file.close();
        LOG.info("Deliberately corrupting file " + metaFile.getName() + " at offset " + position + " length " + length);
        // read all files to trigger detection of corrupted replica
        try {
            util.checkFiles(fs, "/srcdat10");
        } catch (BlockMissingException e) {
            System.out.println("Received BlockMissingException as expected.");
        } catch (IOException e) {
            assertTrue("Corrupted replicas not handled properly. Expecting BlockMissingException " + " but received IOException " + e, false);
        }
        // fetch bad file list from namenode. There should be one file.
        badFiles = namenode.getNamesystem().listCorruptFileBlocks("/", null);
        LOG.info("Namenode has bad files. " + badFiles.size());
        assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", badFiles.size() == 1);
        util.cleanup(fs, "/srcdat10");
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 4 with BlockMissingException

use of org.apache.hadoop.hdfs.BlockMissingException in project hadoop by apache.

the class TestListCorruptFileBlocks method testListCorruptFileBlocksInSafeMode.

/**
   * Check that listCorruptFileBlocks works while the namenode is still in safemode.
   */
@Test(timeout = 300000)
public void testListCorruptFileBlocksInSafeMode() throws Exception {
    MiniDFSCluster cluster = null;
    Random random = new Random();
    try {
        Configuration conf = new HdfsConfiguration();
        // datanode scans directories
        conf.setInt(DFSConfigKeys.DFS_DATANODE_DIRECTORYSCAN_INTERVAL_KEY, 1);
        // datanode sends block reports
        conf.setInt(DFSConfigKeys.DFS_BLOCKREPORT_INTERVAL_MSEC_KEY, 3 * 1000);
        // never leave safemode automatically
        conf.setFloat(DFSConfigKeys.DFS_NAMENODE_SAFEMODE_THRESHOLD_PCT_KEY, 1.5f);
        // start populating repl queues immediately 
        conf.setFloat(DFSConfigKeys.DFS_NAMENODE_REPL_QUEUE_THRESHOLD_PCT_KEY, 0f);
        // Set short retry timeouts so this test runs faster
        conf.setInt(HdfsClientConfigKeys.Retry.WINDOW_BASE_KEY, 10);
        cluster = new MiniDFSCluster.Builder(conf).waitSafeMode(false).build();
        cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, false);
        FileSystem fs = cluster.getFileSystem();
        // create two files with one block each
        DFSTestUtil util = new DFSTestUtil.Builder().setName("testListCorruptFileBlocksInSafeMode").setNumFiles(2).setMaxLevels(1).setMaxSize(512).build();
        util.createFiles(fs, "/srcdat10");
        // fetch bad file list from namenode. There should be none.
        Collection<FSNamesystem.CorruptFileBlockInfo> badFiles = cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
        assertTrue("Namenode has " + badFiles.size() + " corrupt files. Expecting None.", badFiles.size() == 0);
        // Now deliberately corrupt one block
        File storageDir = cluster.getInstanceStorageDir(0, 0);
        File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, cluster.getNamesystem().getBlockPoolId());
        assertTrue("data directory does not exist", data_dir.exists());
        List<File> metaFiles = MiniDFSCluster.getAllBlockMetadataFiles(data_dir);
        assertTrue("Data directory does not contain any blocks or there was an " + "IO error", metaFiles != null && !metaFiles.isEmpty());
        File metaFile = metaFiles.get(0);
        RandomAccessFile file = new RandomAccessFile(metaFile, "rw");
        FileChannel channel = file.getChannel();
        long position = channel.size() - 2;
        int length = 2;
        byte[] buffer = new byte[length];
        random.nextBytes(buffer);
        channel.write(ByteBuffer.wrap(buffer), position);
        file.close();
        LOG.info("Deliberately corrupting file " + metaFile.getName() + " at offset " + position + " length " + length);
        // read all files to trigger detection of corrupted replica
        try {
            util.checkFiles(fs, "/srcdat10");
        } catch (BlockMissingException e) {
            System.out.println("Received BlockMissingException as expected.");
        } catch (IOException e) {
            assertTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false);
        }
        // fetch bad file list from namenode. There should be one file.
        badFiles = cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
        LOG.info("Namenode has bad files. " + badFiles.size());
        assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", badFiles.size() == 1);
        // restart namenode
        cluster.restartNameNode(0);
        fs = cluster.getFileSystem();
        // wait until replication queues have been initialized
        while (!cluster.getNameNode().namesystem.getBlockManager().isPopulatingReplQueues()) {
            try {
                LOG.info("waiting for replication queues");
                Thread.sleep(1000);
            } catch (InterruptedException ignore) {
            }
        }
        // read all files to trigger detection of corrupted replica
        try {
            util.checkFiles(fs, "/srcdat10");
        } catch (BlockMissingException e) {
            System.out.println("Received BlockMissingException as expected.");
        } catch (IOException e) {
            assertTrue("Corrupted replicas not handled properly. " + "Expecting BlockMissingException " + " but received IOException " + e, false);
        }
        // fetch bad file list from namenode. There should be one file.
        badFiles = cluster.getNameNode().getNamesystem().listCorruptFileBlocks("/", null);
        LOG.info("Namenode has bad files. " + badFiles.size());
        assertTrue("Namenode has " + badFiles.size() + " bad files. Expecting 1.", badFiles.size() == 1);
        // check that we are still in safe mode
        assertTrue("Namenode is not in safe mode", cluster.getNameNode().isInSafeMode());
        // now leave safe mode so that we can clean up
        cluster.getNameNodeRpc().setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_LEAVE, false);
        util.cleanup(fs, "/srcdat10");
    } catch (Exception e) {
        LOG.error(StringUtils.stringifyException(e));
        throw e;
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}
Also used : DFSTestUtil(org.apache.hadoop.hdfs.DFSTestUtil) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) Configuration(org.apache.hadoop.conf.Configuration) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) FileChannel(java.nio.channels.FileChannel) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) Random(java.util.Random) RandomAccessFile(java.io.RandomAccessFile) FileSystem(org.apache.hadoop.fs.FileSystem) DistributedFileSystem(org.apache.hadoop.hdfs.DistributedFileSystem) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) RandomAccessFile(java.io.RandomAccessFile) File(java.io.File) Test(org.junit.Test)

Example 5 with BlockMissingException

use of org.apache.hadoop.hdfs.BlockMissingException in project presto by prestodb.

the class IcebergPageSourceProvider method createBatchOrcPageSource.

private static ConnectorPageSource createBatchOrcPageSource(HdfsEnvironment hdfsEnvironment, String user, Configuration configuration, Path path, long start, long length, boolean isCacheable, List<IcebergColumnHandle> regularColumns, TypeManager typeManager, TupleDomain<IcebergColumnHandle> effectivePredicate, OrcReaderOptions options, OrcEncoding orcEncoding, DataSize maxBufferSize, DataSize streamBufferSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, int domainCompactionThreshold, OrcFileTailSource orcFileTailSource, StripeMetadataSourceFactory stripeMetadataSourceFactory, FileFormatDataSourceStats stats, Optional<EncryptionInformation> encryptionInformation, DwrfEncryptionProvider dwrfEncryptionProvider) {
    OrcDataSource orcDataSource = null;
    try {
        ExtendedFileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path, configuration);
        FileStatus fileStatus = fileSystem.getFileStatus(path);
        long fileSize = fileStatus.getLen();
        long modificationTime = fileStatus.getModificationTime();
        HiveFileContext hiveFileContext = new HiveFileContext(true, NO_CACHE_CONSTRAINTS, Optional.empty(), Optional.of(fileSize), modificationTime, false);
        FSDataInputStream inputStream = hdfsEnvironment.doAs(user, () -> fileSystem.openFile(path, hiveFileContext));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, options.getMaxMergeDistance(), maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats);
        // Todo: pass real columns to ProjectionBasedDwrfKeyProvider instead of ImmutableList.of()
        DwrfKeyProvider dwrfKeyProvider = new ProjectionBasedDwrfKeyProvider(encryptionInformation, ImmutableList.of(), true, path);
        RuntimeStats runtimeStats = new RuntimeStats();
        OrcReader reader = new OrcReader(orcDataSource, orcEncoding, orcFileTailSource, stripeMetadataSourceFactory, new HiveOrcAggregatedMemoryContext(), options, isCacheable, dwrfEncryptionProvider, dwrfKeyProvider, runtimeStats);
        List<HiveColumnHandle> physicalColumnHandles = new ArrayList<>(regularColumns.size());
        ImmutableMap.Builder<Integer, Type> includedColumns = ImmutableMap.builder();
        ImmutableList.Builder<TupleDomainOrcPredicate.ColumnReference<HiveColumnHandle>> columnReferences = ImmutableList.builder();
        List<IcebergOrcColumn> fileOrcColumns = getFileOrcColumns(reader);
        Map<Integer, IcebergOrcColumn> fileOrcColumnByIcebergId = fileOrcColumns.stream().filter(orcColumn -> orcColumn.getAttributes().containsKey(ORC_ICEBERG_ID_KEY)).collect(toImmutableMap(orcColumn -> Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY)), orcColumn -> IcebergOrcColumn.copy(orcColumn).setIcebergColumnId(Optional.of(Integer.parseInt(orcColumn.getAttributes().get(ORC_ICEBERG_ID_KEY))))));
        Map<String, IcebergOrcColumn> fileOrcColumnsByName = uniqueIndex(fileOrcColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        int nextMissingColumnIndex = fileOrcColumnsByName.size();
        for (IcebergColumnHandle column : regularColumns) {
            IcebergOrcColumn icebergOrcColumn;
            boolean isExcludeColumn = false;
            if (fileOrcColumnByIcebergId.isEmpty()) {
                icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
            } else {
                icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId());
                if (icebergOrcColumn == null) {
                    // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name.
                    icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
                    if (icebergOrcColumn != null) {
                        isExcludeColumn = true;
                    }
                }
            }
            if (icebergOrcColumn != null) {
                HiveColumnHandle columnHandle = new HiveColumnHandle(// Todo: using orc file column name
                column.getName(), toHiveType(column.getType()), column.getType().getTypeSignature(), icebergOrcColumn.getOrcColumnId(), icebergOrcColumn.getColumnType(), Optional.empty(), Optional.empty());
                physicalColumnHandles.add(columnHandle);
                // Skip SchemaEvolution column
                if (!isExcludeColumn) {
                    includedColumns.put(columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature()));
                    columnReferences.add(new TupleDomainOrcPredicate.ColumnReference<>(columnHandle, columnHandle.getHiveColumnIndex(), typeManager.getType(columnHandle.getTypeSignature())));
                }
            } else {
                physicalColumnHandles.add(new HiveColumnHandle(column.getName(), toHiveType(column.getType()), column.getType().getTypeSignature(), nextMissingColumnIndex++, REGULAR, Optional.empty(), Optional.empty()));
            }
        }
        TupleDomain<HiveColumnHandle> hiveColumnHandleTupleDomain = effectivePredicate.transform(column -> {
            IcebergOrcColumn icebergOrcColumn;
            if (fileOrcColumnByIcebergId.isEmpty()) {
                icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
            } else {
                icebergOrcColumn = fileOrcColumnByIcebergId.get(column.getId());
                if (icebergOrcColumn == null) {
                    // Cannot get orc column from 'fileOrcColumnByIcebergId', which means SchemaEvolution may have happened, so we get orc column by column name.
                    icebergOrcColumn = fileOrcColumnsByName.get(column.getName());
                }
            }
            return new HiveColumnHandle(column.getName(), toHiveType(column.getType()), column.getType().getTypeSignature(), // Note: the HiveColumnHandle.hiveColumnIndex starts from '0' while the IcebergColumnHandle.id starts from '1'
            icebergOrcColumn != null ? icebergOrcColumn.getOrcColumnId() : column.getId() - 1, icebergOrcColumn != null ? icebergOrcColumn.getColumnType() : REGULAR, Optional.empty(), Optional.empty());
        });
        OrcPredicate predicate = new TupleDomainOrcPredicate<>(hiveColumnHandleTupleDomain, columnReferences.build(), orcBloomFiltersEnabled, Optional.of(domainCompactionThreshold));
        OrcAggregatedMemoryContext systemMemoryUsage = new HiveOrcAggregatedMemoryContext();
        OrcBatchRecordReader recordReader = reader.createBatchRecordReader(includedColumns.build(), predicate, start, length, UTC, systemMemoryUsage, INITIAL_BATCH_SIZE);
        return new OrcBatchPageSource(recordReader, orcDataSource, physicalColumnHandles, typeManager, systemMemoryUsage, stats, runtimeStats);
    } catch (Exception e) {
        if (orcDataSource != null) {
            try {
                orcDataSource.close();
            } catch (IOException ignored) {
            }
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = format("Error opening Iceberg split %s (offset=%s, length=%s): %s", path, start, length, e.getMessage());
        if (e instanceof BlockMissingException) {
            throw new PrestoException(ICEBERG_MISSING_DATA, message, e);
        }
        throw new PrestoException(ICEBERG_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : RichColumnDescriptor(com.facebook.presto.parquet.RichColumnDescriptor) HiveSessionProperties.isUseParquetColumnNames(com.facebook.presto.hive.HiveSessionProperties.isUseParquetColumnNames) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) FileStatus(org.apache.hadoop.fs.FileStatus) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ConnectorTransactionHandle(com.facebook.presto.spi.connector.ConnectorTransactionHandle) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) OrcDataSource(com.facebook.presto.orc.OrcDataSource) FileFormatDataSourceStats(com.facebook.presto.hive.FileFormatDataSourceStats) ConnectorPageSourceProvider(com.facebook.presto.spi.connector.ConnectorPageSourceProvider) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ParquetDataSource(com.facebook.presto.parquet.ParquetDataSource) ORC_ICEBERG_ID_KEY(com.facebook.presto.iceberg.TypeConverter.ORC_ICEBERG_ID_KEY) IcebergSessionProperties.getOrcLazyReadSmallRanges(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcLazyReadSmallRanges) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) HiveFileContext(com.facebook.presto.hive.HiveFileContext) ColumnIndexStore(org.apache.parquet.internal.filter2.columnindex.ColumnIndexStore) ConnectorSession(com.facebook.presto.spi.ConnectorSession) ORC(com.facebook.presto.orc.OrcEncoding.ORC) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ColumnDescriptor(org.apache.parquet.column.ColumnDescriptor) BlockMetaData(org.apache.parquet.hadoop.metadata.BlockMetaData) ICEBERG_BAD_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA) ParquetPageSource(com.facebook.presto.hive.parquet.ParquetPageSource) HdfsParquetDataSource.buildHdfsParquetDataSource(com.facebook.presto.hive.parquet.HdfsParquetDataSource.buildHdfsParquetDataSource) MetadataReader(com.facebook.presto.parquet.cache.MetadataReader) StandardTypes(com.facebook.presto.common.type.StandardTypes) REGULAR(com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) OrcReaderOptions(com.facebook.presto.orc.OrcReaderOptions) IcebergSessionProperties.getOrcMaxReadBlockSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxReadBlockSize) ArrayList(java.util.ArrayList) IcebergSessionProperties.getOrcTinyStripeThreshold(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcTinyStripeThreshold) ROOT_COLUMN_ID(com.facebook.presto.iceberg.IcebergOrcColumn.ROOT_COLUMN_ID) ICEBERG_MISSING_DATA(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_MISSING_DATA) DwrfEncryptionProvider(com.facebook.presto.orc.DwrfEncryptionProvider) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) IOException(java.io.IOException) UTC(org.joda.time.DateTimeZone.UTC) FileFormat(org.apache.iceberg.FileFormat) Domain(com.facebook.presto.common.predicate.Domain) ParquetReader(com.facebook.presto.parquet.reader.ParquetReader) ConnectorSplit(com.facebook.presto.spi.ConnectorSplit) HiveSessionProperties.getParquetMaxReadBlockSize(com.facebook.presto.hive.HiveSessionProperties.getParquetMaxReadBlockSize) ColumnHandle(com.facebook.presto.spi.ColumnHandle) IcebergSessionProperties.isOrcZstdJniDecompressionEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcZstdJniDecompressionEnabled) FileMetaData(org.apache.parquet.hadoop.metadata.FileMetaData) ParquetMetadata(org.apache.parquet.hadoop.metadata.ParquetMetadata) OrcReader(com.facebook.presto.orc.OrcReader) ColumnIOConverter.constructField(org.apache.parquet.io.ColumnIOConverter.constructField) HdfsEnvironment(com.facebook.presto.hive.HdfsEnvironment) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) NO_CACHE_CONSTRAINTS(com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS) IcebergSessionProperties.getOrcMaxBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxBufferSize) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) SplitContext(com.facebook.presto.spi.SplitContext) ParquetTypeUtils.getDescriptors(com.facebook.presto.parquet.ParquetTypeUtils.getDescriptors) Path(org.apache.hadoop.fs.Path) EncryptionInformation(com.facebook.presto.hive.EncryptionInformation) RuntimeStats(com.facebook.presto.common.RuntimeStats) HdfsContext(com.facebook.presto.hive.HdfsContext) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveSessionProperties.isParquetBatchReadsEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReadsEnabled) HiveClientConfig(com.facebook.presto.hive.HiveClientConfig) StripeMetadataSourceFactory(com.facebook.presto.orc.StripeMetadataSourceFactory) ImmutableMap(com.google.common.collect.ImmutableMap) INITIAL_BATCH_SIZE(com.facebook.presto.orc.OrcReader.INITIAL_BATCH_SIZE) OrcPredicate(com.facebook.presto.orc.OrcPredicate) HiveDwrfEncryptionProvider(com.facebook.presto.hive.HiveDwrfEncryptionProvider) String.format(java.lang.String.format) IcebergSessionProperties.isOrcBloomFiltersEnabled(com.facebook.presto.iceberg.IcebergSessionProperties.isOrcBloomFiltersEnabled) ColumnIndexFilterUtils(com.facebook.presto.parquet.reader.ColumnIndexFilterUtils) Objects(java.util.Objects) MessageType(org.apache.parquet.schema.MessageType) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.isParquetBatchReaderVerificationEnabled(com.facebook.presto.hive.HiveSessionProperties.isParquetBatchReaderVerificationEnabled) NOT_SUPPORTED(com.facebook.presto.spi.StandardErrorCode.NOT_SUPPORTED) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) Optional(java.util.Optional) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) MessageColumnIO(org.apache.parquet.io.MessageColumnIO) IntStream(java.util.stream.IntStream) ConnectorTableLayoutHandle(com.facebook.presto.spi.ConnectorTableLayoutHandle) PredicateUtils.predicateMatches(com.facebook.presto.parquet.predicate.PredicateUtils.predicateMatches) PrestoException(com.facebook.presto.spi.PrestoException) Function(java.util.function.Function) Inject(javax.inject.Inject) ParquetTypeUtils.getParquetTypeByName(com.facebook.presto.parquet.ParquetTypeUtils.getParquetTypeByName) ImmutableList(com.google.common.collect.ImmutableList) ICEBERG_CANNOT_OPEN_SPLIT(com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_CANNOT_OPEN_SPLIT) TypeManager(com.facebook.presto.common.type.TypeManager) Objects.requireNonNull(java.util.Objects.requireNonNull) Predicate(com.facebook.presto.parquet.predicate.Predicate) OrcType(com.facebook.presto.orc.metadata.OrcType) OrcFileTailSource(com.facebook.presto.orc.cache.OrcFileTailSource) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) PredicateUtils.buildPredicate(com.facebook.presto.parquet.predicate.PredicateUtils.buildPredicate) Type(com.facebook.presto.common.type.Type) IcebergSessionProperties.getOrcMaxMergeDistance(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcMaxMergeDistance) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) OrcEncoding(com.facebook.presto.orc.OrcEncoding) ParquetTypeUtils.getColumnIO(com.facebook.presto.parquet.ParquetTypeUtils.getColumnIO) TupleDomain(com.facebook.presto.common.predicate.TupleDomain) AggregatedMemoryContext(com.facebook.presto.memory.context.AggregatedMemoryContext) Field(com.facebook.presto.parquet.Field) Collectors.toList(java.util.stream.Collectors.toList) ConnectorPageSource(com.facebook.presto.spi.ConnectorPageSource) IcebergSessionProperties.getOrcStreamBufferSize(com.facebook.presto.iceberg.IcebergSessionProperties.getOrcStreamBufferSize) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) FileStatus(org.apache.hadoop.fs.FileStatus) RuntimeStats(com.facebook.presto.common.RuntimeStats) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ImmutableList(com.google.common.collect.ImmutableList) ArrayList(java.util.ArrayList) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) PrestoException(com.facebook.presto.spi.PrestoException) HiveFileContext(com.facebook.presto.hive.HiveFileContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) DwrfKeyProvider(com.facebook.presto.orc.DwrfKeyProvider) ProjectionBasedDwrfKeyProvider(com.facebook.presto.hive.orc.ProjectionBasedDwrfKeyProvider) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcAggregatedMemoryContext(com.facebook.presto.orc.OrcAggregatedMemoryContext) HiveColumnHandle(com.facebook.presto.hive.HiveColumnHandle) OrcDataSource(com.facebook.presto.orc.OrcDataSource) HdfsOrcDataSource(com.facebook.presto.hive.orc.HdfsOrcDataSource) OrcDataSourceId(com.facebook.presto.orc.OrcDataSourceId) OrcBatchRecordReader(com.facebook.presto.orc.OrcBatchRecordReader) HiveOrcAggregatedMemoryContext(com.facebook.presto.hive.HiveOrcAggregatedMemoryContext) OrcBatchPageSource(com.facebook.presto.hive.orc.OrcBatchPageSource) IOException(java.io.IOException) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ParquetCorruptionException(com.facebook.presto.parquet.ParquetCorruptionException) IOException(java.io.IOException) PrestoException(com.facebook.presto.spi.PrestoException) TypeConverter.toHiveType(com.facebook.presto.iceberg.TypeConverter.toHiveType) MessageType(org.apache.parquet.schema.MessageType) OrcType(com.facebook.presto.orc.metadata.OrcType) Type(com.facebook.presto.common.type.Type) OrcReader(com.facebook.presto.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ExtendedFileSystem(com.facebook.presto.hive.filesystem.ExtendedFileSystem) TupleDomainOrcPredicate(com.facebook.presto.orc.TupleDomainOrcPredicate) OrcPredicate(com.facebook.presto.orc.OrcPredicate)

Aggregations

BlockMissingException (org.apache.hadoop.hdfs.BlockMissingException)5 IOException (java.io.IOException)3 Configuration (org.apache.hadoop.conf.Configuration)3 RuntimeStats (com.facebook.presto.common.RuntimeStats)2 Domain (com.facebook.presto.common.predicate.Domain)2 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)2 StandardTypes (com.facebook.presto.common.type.StandardTypes)2 Type (com.facebook.presto.common.type.Type)2 TypeManager (com.facebook.presto.common.type.TypeManager)2 NO_CACHE_CONSTRAINTS (com.facebook.presto.hive.CacheQuota.NO_CACHE_CONSTRAINTS)2 EncryptionInformation (com.facebook.presto.hive.EncryptionInformation)2 FileFormatDataSourceStats (com.facebook.presto.hive.FileFormatDataSourceStats)2 HdfsContext (com.facebook.presto.hive.HdfsContext)2 HdfsEnvironment (com.facebook.presto.hive.HdfsEnvironment)2 HiveClientConfig (com.facebook.presto.hive.HiveClientConfig)2 HiveColumnHandle (com.facebook.presto.hive.HiveColumnHandle)2 REGULAR (com.facebook.presto.hive.HiveColumnHandle.ColumnType.REGULAR)2 HiveDwrfEncryptionProvider (com.facebook.presto.hive.HiveDwrfEncryptionProvider)2 HiveFileContext (com.facebook.presto.hive.HiveFileContext)2 HiveOrcAggregatedMemoryContext (com.facebook.presto.hive.HiveOrcAggregatedMemoryContext)2