Search in sources :

Example 6 with OrcDataSourceId

use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.

the class TestDecimalStream method testSkipToEdgeOfChunkLong.

@Test
public void testSkipToEdgeOfChunkLong() throws IOException {
    OrcChunkLoader loader = new TestingChunkLoader(new OrcDataSourceId("skip to edge of chunk long"), ImmutableList.of(encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE))), encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE)))));
    DecimalInputStream stream = new DecimalInputStream(loader);
    stream.skip(1);
    assertEquals(nextLongDecimalValue(stream), BigInteger.valueOf(Long.MAX_VALUE));
}
Also used : OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) Test(org.testng.annotations.Test)

Example 7 with OrcDataSourceId

use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.

the class TestDecimalStream method testReadToEdgeOfChunkLong.

@Test
public void testReadToEdgeOfChunkLong() throws IOException {
    OrcChunkLoader loader = new TestingChunkLoader(new OrcDataSourceId("skip to edge of chunk long"), ImmutableList.of(encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE))), encodeValues(ImmutableList.of(BigInteger.valueOf(Long.MAX_VALUE)))));
    DecimalInputStream stream = new DecimalInputStream(loader);
    assertEquals(nextLongDecimalValue(stream), BigInteger.valueOf(Long.MAX_VALUE));
    assertEquals(nextLongDecimalValue(stream), BigInteger.valueOf(Long.MAX_VALUE));
}
Also used : OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) Test(org.testng.annotations.Test)

Example 8 with OrcDataSourceId

use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.

the class HiveWriterFactory method mergeSubFiles.

public void mergeSubFiles(List<HiveWriter> writers) throws IOException {
    if (writers.isEmpty()) {
        return;
    }
    FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), new Path(writers.get(0).getFilePath()), conf);
    List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager)).collect(toList());
    for (HiveWriter writer : writers) {
        String filePath = writer.getFilePath();
        Path path = new Path(filePath);
        logContainingFolderInfo(fileSystem, path, "Merging snapshot files to result file: %s", path);
        // The snapshotSuffixes list records the "resumeCount" for each suffix.
        // It doesn't has an entry for the current set of files, so an entry is added first.
        // The resumeCount helps distinguish files created by different runs.
        snapshotSuffixes.add(resumeCount);
        for (int i = 0; i < snapshotSuffixes.size(); i++) {
            long resume = snapshotSuffixes.get(i);
            Path file = new Path(toSnapshotSubFile(filePath, resume, i));
            if (fileSystem.exists(file)) {
                // TODO-cp-I2BZ0A: assuming all files to be of ORC type.
                // Using same parameters as used by SortingFileWriter
                FileStatus fileStatus = fileSystem.getFileStatus(file);
                try (TempFileReader reader = new TempFileReader(types, new HdfsOrcDataSource(new OrcDataSourceId(file.toString()), fileStatus.getLen(), new DataSize(1, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(8, MEGABYTE), false, fileSystem.open(file), new FileFormatDataSourceStats(), fileStatus.getModificationTime()))) {
                    while (reader.hasNext()) {
                        writer.append(reader.next());
                    }
                }
            // DO NOT delete the sub file, in case we need to resume. Delete them when the query finishes.
            }
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) UpdateMode(io.prestosql.plugin.hive.PartitionUpdate.UpdateMode) FileSystem(org.apache.hadoop.fs.FileSystem) HIVE_FILESYSTEM_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILESYSTEM_ERROR) HdfsContext(io.prestosql.plugin.hive.HdfsEnvironment.HdfsContext) FileStatus(org.apache.hadoop.fs.FileStatus) SortOrder(io.prestosql.spi.block.SortOrder) CompressionCodec(org.apache.hadoop.io.compress.CompressionCodec) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) HiveIgnoreKeyTextOutputFormat(org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat) Collectors.toMap(java.util.stream.Collectors.toMap) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HiveUtil.getColumnNames(io.prestosql.plugin.hive.HiveUtil.getColumnNames) PropertyMetadata(io.prestosql.spi.session.PropertyMetadata) Path(org.apache.hadoop.fs.Path) HIVE_PARTITION_SCHEMA_MISMATCH(io.prestosql.plugin.hive.HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH) Type(io.prestosql.spi.type.Type) ENGLISH(java.util.Locale.ENGLISH) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) HIVE_WRITER_OPEN_ERROR(io.prestosql.plugin.hive.HiveErrorCode.HIVE_WRITER_OPEN_ERROR) ImmutableMap(com.google.common.collect.ImmutableMap) EventClient(io.airlift.event.client.EventClient) HIVE_UNSUPPORTED_FORMAT(io.prestosql.plugin.hive.HiveErrorCode.HIVE_UNSUPPORTED_FORMAT) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Set(java.util.Set) HIVE_PATH_ALREADY_EXISTS(io.prestosql.plugin.hive.HiveErrorCode.HIVE_PATH_ALREADY_EXISTS) Math.min(java.lang.Math.min) Collectors(java.util.stream.Collectors) HiveWriteUtils.createPartitionValues(io.prestosql.plugin.hive.HiveWriteUtils.createPartitionValues) Sets(com.google.common.collect.Sets) String.format(java.lang.String.format) Collectors.joining(java.util.stream.Collectors.joining) Preconditions.checkState(com.google.common.base.Preconditions.checkState) ReflectionUtil(org.apache.hive.common.util.ReflectionUtil) DataSize(io.airlift.units.DataSize) List(java.util.List) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Principal(java.security.Principal) Table(io.prestosql.plugin.hive.metastore.Table) HIVE_TABLE_READ_ONLY(io.prestosql.plugin.hive.HiveErrorCode.HIVE_TABLE_READ_ONLY) HdfsOrcDataSource(io.prestosql.plugin.hive.orc.HdfsOrcDataSource) Function.identity(java.util.function.Function.identity) FileUtils(org.apache.hadoop.hive.common.FileUtils) Optional(java.util.Optional) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ConfigurationUtils.toJobConf(io.prestosql.plugin.hive.util.ConfigurationUtils.toJobConf) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) HivePageSinkMetadataProvider(io.prestosql.plugin.hive.metastore.HivePageSinkMetadataProvider) Partition(io.prestosql.plugin.hive.metastore.Partition) Logger(io.airlift.log.Logger) HiveUtil.getColumnTypes(io.prestosql.plugin.hive.HiveUtil.getColumnTypes) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) HashMap(java.util.HashMap) StorageFormat.fromHiveStorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat.fromHiveStorageFormat) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) TempFileReader(io.prestosql.plugin.hive.util.TempFileReader) NOT_FOUND(io.prestosql.spi.StandardErrorCode.NOT_FOUND) OptionalInt(java.util.OptionalInt) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Strings(com.google.common.base.Strings) ImmutableList(com.google.common.collect.ImmutableList) COMPRESSRESULT(org.apache.hadoop.hive.conf.HiveConf.ConfVars.COMPRESSRESULT) Objects.requireNonNull(java.util.Objects.requireNonNull) HIVE_PARTITION_READ_ONLY(io.prestosql.plugin.hive.HiveErrorCode.HIVE_PARTITION_READ_ONLY) DIRECT_TO_TARGET_EXISTING_DIRECTORY(io.prestosql.plugin.hive.LocationHandle.WriteMode.DIRECT_TO_TARGET_EXISTING_DIRECTORY) Properties(java.util.Properties) InsertExistingPartitionsBehavior(io.prestosql.plugin.hive.HiveSessionProperties.InsertExistingPartitionsBehavior) HiveConf(org.apache.hadoop.hive.conf.HiveConf) TypeManager(io.prestosql.spi.type.TypeManager) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) HIVE_INVALID_METADATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_INVALID_METADATA) Page(io.prestosql.spi.Page) IOException(java.io.IOException) PageSorter(io.prestosql.spi.PageSorter) Options(org.apache.hadoop.hive.ql.io.AcidOutputFormat.Options) JobConf(org.apache.hadoop.mapred.JobConf) Consumer(java.util.function.Consumer) UUID.randomUUID(java.util.UUID.randomUUID) Collectors.toList(java.util.stream.Collectors.toList) MetastoreUtil.getHiveSchema(io.prestosql.plugin.hive.metastore.MetastoreUtil.getHiveSchema) Column(io.prestosql.plugin.hive.metastore.Column) NodeManager(io.prestosql.spi.NodeManager) WriteInfo(io.prestosql.plugin.hive.LocationService.WriteInfo) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) FileStatus(org.apache.hadoop.fs.FileStatus) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) HdfsOrcDataSource(io.prestosql.plugin.hive.orc.HdfsOrcDataSource) Type(io.prestosql.spi.type.Type) FileSystem(org.apache.hadoop.fs.FileSystem) DataSize(io.airlift.units.DataSize) TempFileReader(io.prestosql.plugin.hive.util.TempFileReader)

Example 9 with OrcDataSourceId

use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.

the class SortingFileWriter method mergeFiles.

private void mergeFiles(Iterable<TempFile> files, Consumer<Page> consumer) {
    try (Closer closer = Closer.create()) {
        Collection<Iterator<Page>> iterators = new ArrayList<>();
        for (TempFile tempFile : files) {
            Path file = tempFile.getPath();
            FileStatus fileStatus = fileSystem.getFileStatus(file);
            OrcDataSource dataSource = new HdfsOrcDataSource(new OrcDataSourceId(file.toString()), fileStatus.getLen(), new DataSize(1, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(8, MEGABYTE), false, fileSystem.open(file), new FileFormatDataSourceStats(), fileStatus.getModificationTime());
            TempFileReader reader = new TempFileReader(types, dataSource);
            // Closing the reader also closes the data source
            closer.register(reader);
            iterators.add(reader);
        }
        new MergingPageIterator(iterators, types, sortFields, sortOrders).forEachRemaining(consumer);
        for (TempFile tempFile : files) {
            Path file = tempFile.getPath();
            fileSystem.delete(file, false);
            if (fileSystem.exists(file)) {
                throw new IOException("Failed to delete temporary file: " + file);
            }
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}
Also used : Closer(com.google.common.io.Closer) Path(org.apache.hadoop.fs.Path) OrcDataSource(io.prestosql.orc.OrcDataSource) HdfsOrcDataSource(io.prestosql.plugin.hive.orc.HdfsOrcDataSource) MergingPageIterator(io.prestosql.plugin.hive.util.MergingPageIterator) FileStatus(org.apache.hadoop.fs.FileStatus) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) ArrayList(java.util.ArrayList) HdfsOrcDataSource(io.prestosql.plugin.hive.orc.HdfsOrcDataSource) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) UncheckedIOException(java.io.UncheckedIOException) DataSize(io.airlift.units.DataSize) Iterator(java.util.Iterator) MergingPageIterator(io.prestosql.plugin.hive.util.MergingPageIterator) TempFileReader(io.prestosql.plugin.hive.util.TempFileReader)

Example 10 with OrcDataSourceId

use of io.prestosql.orc.OrcDataSourceId in project hetu-core by openlookeng.

the class OrcPageSourceFactory method createOrcPageSource.

public static OrcPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, DataSize tinyStripeThreshold, DataSize maxReadBlockSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, Optional<DynamicFilterSupplier> dynamicFilters, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, SplitMetadata splitMetadata, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, int domainCompactionThreshold, boolean pageMetadataEnabled, long dataSourceLastModifiedTime) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR || column.getHiveColumnIndex() == HiveColumnHandle.ROW_ID__COLUMN_INDEX, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    try {
        // Always create a lazy Stream. HDFS stream opened only when required.
        FSDataInputStream inputStream = new FSDataInputStream(new LazyFSInputStream(() -> {
            FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
            return hdfsEnvironment.doAs(sessionUser, () -> fileSystem.open(path));
        }));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, maxMergeDistance, maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats, dataSourceLastModifiedTime);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext systemMemoryUsage = newSimpleAggregatedMemoryContext();
    try {
        OrcDataSource readerLocalDataSource = OrcReader.wrapWithCacheIfTiny(orcDataSource, tinyStripeThreshold);
        OrcFileTail fileTail;
        if (orcCacheProperties.isFileTailCacheEnabled()) {
            try {
                OrcDataSourceIdWithTimeStamp orcDataSourceIdWithTimeStamp = new OrcDataSourceIdWithTimeStamp(readerLocalDataSource.getId(), readerLocalDataSource.getLastModifiedTime());
                fileTail = orcCacheStore.getFileTailCache().get(new OrcFileTailCacheKey(orcDataSourceIdWithTimeStamp), () -> OrcPageSourceFactory.createFileTail(orcDataSource));
            } catch (UncheckedExecutionException | ExecutionException executionException) {
                handleCacheLoadException(executionException);
                log.debug(executionException.getCause(), "Error while caching the Orc file tail. Falling back to default flow");
                fileTail = OrcPageSourceFactory.createFileTail(orcDataSource);
            }
        } else {
            fileTail = OrcPageSourceFactory.createFileTail(orcDataSource);
        }
        OrcReader reader = new OrcReader(readerLocalDataSource, fileTail, maxMergeDistance, tinyStripeThreshold, maxReadBlockSize);
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        List<OrcColumn> fileReadColumns = isFullAcid ? new ArrayList<>(columns.size() + 5) : new ArrayList<>(columns.size());
        List<Type> fileReadTypes = isFullAcid ? new ArrayList<>(columns.size() + 5) : new ArrayList<>(columns.size());
        ImmutableList<String> acidColumnNames = null;
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        // Only Hive ACID files will begin with bucket_
        boolean fileNameContainsBucket = path.getName().contains("bucket");
        if (isFullAcid && fileNameContainsBucket) {
            // Skip the acid schema check in case of non-ACID files
            acidColumnNames = ImmutableList.<String>builder().add(ACID_COLUMN_ORIGINAL_TRANSACTION, ACID_COLUMN_BUCKET, ACID_COLUMN_ROW_ID, ACID_COLUMN_CURRENT_TRANSACTION, ACID_COLUMN_OPERATION).build();
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            if (AcidUtils.isDeleteDelta(path.getParent())) {
                // Avoid reading column data from delete_delta files.
                // Call will come here in case of Minor VACUUM where all delete_delta files are merge together.
                fileColumns = ImmutableList.of();
            } else {
                fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(ACID_COLUMN_ROW_STRUCT).getNestedColumns(), columns);
            }
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_CURRENT_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_OPERATION.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(orcBloomFiltersEnabled);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            if (useOrcColumnNames || isFullAcid) {
                orcColumn = fileColumnsByName.get(column.getName());
            } else if (column.getHiveColumnIndex() >= 0 && column.getHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getHiveColumnIndex());
            }
            Type readType = typeManager.getType(column.getTypeSignature());
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                Domain domain = effectivePredicateDomains.get(column);
                if (domain != null) {
                    predicateBuilder.addColumn(orcColumn.getColumnId(), domain);
                }
            } else if (isFullAcid && readType instanceof RowType && column.getName().equalsIgnoreCase(HiveColumnHandle.UPDATE_ROW_ID_COLUMN_NAME)) {
                HiveType hiveType = column.getHiveType();
                StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType.getTypeInfo();
                ImmutableList.Builder<ColumnAdaptation> builder = new ImmutableList.Builder<>();
                ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
                List<ColumnAdaptation> adaptations = fieldNames.stream().map(acidColumnNames::indexOf).map(c -> ColumnAdaptation.sourceColumn(c, false)).collect(Collectors.toList());
                columnAdaptations.add(ColumnAdaptation.structColumn(structTypeInfo, adaptations));
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        Map<String, Domain> domains = effectivePredicate.getDomains().get().entrySet().stream().collect(toMap(e -> e.getKey().getName(), Map.Entry::getValue));
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, predicateBuilder.build(), start, length, legacyFileTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), indexes, splitMetadata, domains, orcCacheStore, orcCacheProperties, pageMetadataEnabled);
        OrcDeletedRows deletedRows = new OrcDeletedRows(path.getName(), deleteDeltaLocations, new OrcDeleteDeltaPageSourceFactory(sessionUser, configuration, hdfsEnvironment, maxMergeDistance, maxBufferSize, streamBufferSize, maxReadBlockSize, tinyStripeThreshold, lazyReadSmallRanges, orcBloomFiltersEnabled, stats), sessionUser, configuration, hdfsEnvironment, startRowOffsetOfFile);
        boolean eagerload = false;
        if (indexes.isPresent()) {
            eagerload = indexes.get().stream().anyMatch(indexMetadata -> EAGER_LOAD_INDEX_ID.contains(indexMetadata.getIndex().getId()));
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, eagerload, systemMemoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : OrcReader(io.prestosql.orc.OrcReader) DateTimeZone(org.joda.time.DateTimeZone) LONG(io.prestosql.orc.metadata.OrcType.OrcTypeKind.LONG) TupleDomainOrcPredicate(io.prestosql.orc.TupleDomainOrcPredicate) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSessionProperties.isOrcStripeFooterCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcStripeFooterCacheEnabled) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) HiveSessionProperties.isOrcRowDataCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcRowDataCacheEnabled) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowType(io.prestosql.spi.type.RowType) HiveSessionProperties.getOrcStreamBufferSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ColumnAdaptation(io.prestosql.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileFormatDataSourceStats(io.prestosql.plugin.hive.FileFormatDataSourceStats) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) HIVE_BAD_DATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) OrcCacheProperties(io.prestosql.orc.OrcCacheProperties) Domain(io.prestosql.spi.predicate.Domain) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) HiveSessionProperties.getOrcMaxBufferSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) ArrayList(java.util.ArrayList) INITIAL_BATCH_SIZE(io.prestosql.orc.OrcReader.INITIAL_BATCH_SIZE) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) HiveSessionProperties.getOrcMaxReadBlockSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) HiveSessionProperties.isOrcFileTailCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcFileTailCacheEnabled) HivePageSourceFactory(io.prestosql.plugin.hive.HivePageSourceFactory) Properties(java.util.Properties) DeleteDeltaLocations(io.prestosql.plugin.hive.DeleteDeltaLocations) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) HiveSessionProperties.isOrcBloomFiltersEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcPageSource.handleException(io.prestosql.plugin.hive.orc.OrcPageSource.handleException) STRUCT(io.prestosql.orc.metadata.OrcType.OrcTypeKind.STRUCT) HiveSessionProperties.getOrcLazyReadSmallRanges(io.prestosql.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) OrcRecordReader(io.prestosql.orc.OrcRecordReader) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Seekable(org.apache.hadoop.fs.Seekable) HiveSessionProperties.getOrcTinyStripeThreshold(io.prestosql.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) HiveConfig(io.prestosql.plugin.hive.HiveConfig) OrcDataSourceIdWithTimeStamp(io.prestosql.orc.OrcDataSourceIdWithTimeStamp) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) PrestoException(io.prestosql.spi.PrestoException) OrcFileTail(io.prestosql.orc.OrcFileTail) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) INT(io.prestosql.orc.metadata.OrcType.OrcTypeKind.INT) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) HIVE_FILE_MISSING_COLUMN_NAMES(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveSessionProperties.isOrcBloomFiltersCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcBloomFiltersCacheEnabled) OrcDataSource(io.prestosql.orc.OrcDataSource) HiveType(io.prestosql.plugin.hive.HiveType) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcColumn(io.prestosql.orc.OrcColumn) OrcFileTailCacheKey(io.prestosql.orc.OrcFileTailCacheKey) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.getOrcMaxMergeDistance(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HIVE_CANNOT_OPEN_SPLIT(io.prestosql.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) Logger(io.airlift.log.Logger) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) Inject(javax.inject.Inject) HIVE_MISSING_DATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) PositionedReadable(org.apache.hadoop.fs.PositionedReadable) HiveSessionProperties.isOrcRowIndexCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcRowIndexCacheEnabled) HiveUtil(io.prestosql.plugin.hive.HiveUtil) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) OrcCacheStore(io.prestosql.orc.OrcCacheStore) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) InputStream(java.io.InputStream) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) OrcFileTail(io.prestosql.orc.OrcFileTail) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) OrcColumn(io.prestosql.orc.OrcColumn) OrcReader(io.prestosql.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) OrcFileTailCacheKey(io.prestosql.orc.OrcFileTailCacheKey) Domain(io.prestosql.spi.predicate.Domain) TupleDomain(io.prestosql.spi.predicate.TupleDomain) HiveType(io.prestosql.plugin.hive.HiveType) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) ColumnAdaptation(io.prestosql.plugin.hive.orc.OrcPageSource.ColumnAdaptation) ImmutableList(com.google.common.collect.ImmutableList) RowType(io.prestosql.spi.type.RowType) PrestoException(io.prestosql.spi.PrestoException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ExecutionException(java.util.concurrent.ExecutionException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) OrcDataSource(io.prestosql.orc.OrcDataSource) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) OrcDataSourceIdWithTimeStamp(io.prestosql.orc.OrcDataSourceIdWithTimeStamp) IOException(java.io.IOException) OrcRecordReader(io.prestosql.orc.OrcRecordReader) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) OrcPageSource.handleException(io.prestosql.plugin.hive.orc.OrcPageSource.handleException) PrestoException(io.prestosql.spi.PrestoException) FileNotFoundException(java.io.FileNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) RowType(io.prestosql.spi.type.RowType) Type(io.prestosql.spi.type.Type) HiveType(io.prestosql.plugin.hive.HiveType) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder)

Aggregations

OrcDataSourceId (io.prestosql.orc.OrcDataSourceId)13 IOException (java.io.IOException)8 Path (org.apache.hadoop.fs.Path)8 ImmutableList (com.google.common.collect.ImmutableList)6 ImmutableMap (com.google.common.collect.ImmutableMap)6 DataSize (io.airlift.units.DataSize)6 OrcDataSource (io.prestosql.orc.OrcDataSource)6 HdfsOrcDataSource (io.prestosql.plugin.hive.orc.HdfsOrcDataSource)6 PrestoException (io.prestosql.spi.PrestoException)6 ConnectorSession (io.prestosql.spi.connector.ConnectorSession)6 Type (io.prestosql.spi.type.Type)6 TypeManager (io.prestosql.spi.type.TypeManager)6 List (java.util.List)6 ENGLISH (java.util.Locale.ENGLISH)6 Objects.requireNonNull (java.util.Objects.requireNonNull)6 Optional (java.util.Optional)6 Properties (java.util.Properties)6 FileStatus (org.apache.hadoop.fs.FileStatus)6 FileSystem (org.apache.hadoop.fs.FileSystem)6 AcidUtils (org.apache.hadoop.hive.ql.io.AcidUtils)6