Search in sources :

Example 56 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestPushAggregationThroughOuterJoin method testDoesNotFireWhenNotDistinct.

@Test
public void testDoesNotFireWhenNotDistinct() {
    tester().assertThat(new PushAggregationThroughOuterJoin()).on(p -> p.aggregation(ab -> ab.source(p.join(JoinNode.Type.LEFT, p.values(ImmutableList.of(p.symbol("COL1")), ImmutableList.of(constantExpressions(BIGINT, 10L), constantExpressions(BIGINT, 11L))), p.values(new Symbol("COL2")), ImmutableList.of(new JoinNode.EquiJoinClause(new Symbol("COL1"), new Symbol("COL2"))), ImmutableList.of(new Symbol("COL1"), new Symbol("COL2")), Optional.empty(), Optional.empty(), Optional.empty())).addAggregation(new Symbol("AVG"), PlanBuilder.expression("avg(COL2)"), ImmutableList.of(DOUBLE)).singleGroupingSet(new Symbol("COL1")))).doesNotFire();
    // https://github.com/prestodb/presto/issues/10592
    tester().assertThat(new PushAggregationThroughOuterJoin()).on(p -> p.aggregation(ab -> ab.source(p.join(JoinNode.Type.LEFT, p.project(Assignments.builder().put(p.symbol("COL1", BIGINT), p.variable("COL1")).build(), p.aggregation(builder -> builder.singleGroupingSet(p.symbol("COL1"), p.symbol("unused")).source(p.values(ImmutableList.of(p.symbol("COL1"), p.symbol("unused")), ImmutableList.of(constantExpressions(BIGINT, 10L, 1L), constantExpressions(BIGINT, 10L, 2L)))))), p.values(p.symbol("COL2")), ImmutableList.of(new JoinNode.EquiJoinClause(p.symbol("COL1"), p.symbol("COL2"))), ImmutableList.of(p.symbol("COL1"), p.symbol("COL2")), Optional.empty(), Optional.empty(), Optional.empty())).addAggregation(p.symbol("AVG", DOUBLE), PlanBuilder.expression("avg(COL2)"), ImmutableList.of(DOUBLE)).singleGroupingSet(p.symbol("COL1")))).doesNotFire();
}
Also used : Symbol(io.prestosql.spi.plan.Symbol) BaseRuleTest(io.prestosql.sql.planner.iterative.rule.test.BaseRuleTest) PlanMatchPattern.project(io.prestosql.sql.planner.assertions.PlanMatchPattern.project) PlanBuilder.constantExpressions(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder.constantExpressions) PlanMatchPattern.join(io.prestosql.sql.planner.assertions.PlanMatchPattern.join) ImmutableMap(com.google.common.collect.ImmutableMap) Assignments(io.prestosql.spi.plan.Assignments) PlanMatchPattern.equiJoinClause(io.prestosql.sql.planner.assertions.PlanMatchPattern.equiJoinClause) Test(org.testng.annotations.Test) PlanMatchPattern.singleGroupingSet(io.prestosql.sql.planner.assertions.PlanMatchPattern.singleGroupingSet) PlanMatchPattern.globalAggregation(io.prestosql.sql.planner.assertions.PlanMatchPattern.globalAggregation) PlanMatchPattern.values(io.prestosql.sql.planner.assertions.PlanMatchPattern.values) ImmutableList(com.google.common.collect.ImmutableList) SINGLE(io.prestosql.spi.plan.AggregationNode.Step.SINGLE) PlanMatchPattern.functionCall(io.prestosql.sql.planner.assertions.PlanMatchPattern.functionCall) Optional(java.util.Optional) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) PlanBuilder(io.prestosql.sql.planner.iterative.rule.test.PlanBuilder) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) PlanMatchPattern.aggregation(io.prestosql.sql.planner.assertions.PlanMatchPattern.aggregation) PlanMatchPattern.expression(io.prestosql.sql.planner.assertions.PlanMatchPattern.expression) JoinNode(io.prestosql.spi.plan.JoinNode) Symbol(io.prestosql.spi.plan.Symbol) BaseRuleTest(io.prestosql.sql.planner.iterative.rule.test.BaseRuleTest) Test(org.testng.annotations.Test)

Example 57 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class OrcPageSourceFactory method createOrcPageSource.

public static OrcPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, DataSize tinyStripeThreshold, DataSize maxReadBlockSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, Optional<DynamicFilterSupplier> dynamicFilters, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, SplitMetadata splitMetadata, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, int domainCompactionThreshold, boolean pageMetadataEnabled, long dataSourceLastModifiedTime) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR || column.getHiveColumnIndex() == HiveColumnHandle.ROW_ID__COLUMN_INDEX, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    try {
        // Always create a lazy Stream. HDFS stream opened only when required.
        FSDataInputStream inputStream = new FSDataInputStream(new LazyFSInputStream(() -> {
            FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
            return hdfsEnvironment.doAs(sessionUser, () -> fileSystem.open(path));
        }));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, maxMergeDistance, maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats, dataSourceLastModifiedTime);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext systemMemoryUsage = newSimpleAggregatedMemoryContext();
    try {
        OrcDataSource readerLocalDataSource = OrcReader.wrapWithCacheIfTiny(orcDataSource, tinyStripeThreshold);
        OrcFileTail fileTail;
        if (orcCacheProperties.isFileTailCacheEnabled()) {
            try {
                OrcDataSourceIdWithTimeStamp orcDataSourceIdWithTimeStamp = new OrcDataSourceIdWithTimeStamp(readerLocalDataSource.getId(), readerLocalDataSource.getLastModifiedTime());
                fileTail = orcCacheStore.getFileTailCache().get(new OrcFileTailCacheKey(orcDataSourceIdWithTimeStamp), () -> OrcPageSourceFactory.createFileTail(orcDataSource));
            } catch (UncheckedExecutionException | ExecutionException executionException) {
                handleCacheLoadException(executionException);
                log.debug(executionException.getCause(), "Error while caching the Orc file tail. Falling back to default flow");
                fileTail = OrcPageSourceFactory.createFileTail(orcDataSource);
            }
        } else {
            fileTail = OrcPageSourceFactory.createFileTail(orcDataSource);
        }
        OrcReader reader = new OrcReader(readerLocalDataSource, fileTail, maxMergeDistance, tinyStripeThreshold, maxReadBlockSize);
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        List<OrcColumn> fileReadColumns = isFullAcid ? new ArrayList<>(columns.size() + 5) : new ArrayList<>(columns.size());
        List<Type> fileReadTypes = isFullAcid ? new ArrayList<>(columns.size() + 5) : new ArrayList<>(columns.size());
        ImmutableList<String> acidColumnNames = null;
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        // Only Hive ACID files will begin with bucket_
        boolean fileNameContainsBucket = path.getName().contains("bucket");
        if (isFullAcid && fileNameContainsBucket) {
            // Skip the acid schema check in case of non-ACID files
            acidColumnNames = ImmutableList.<String>builder().add(ACID_COLUMN_ORIGINAL_TRANSACTION, ACID_COLUMN_BUCKET, ACID_COLUMN_ROW_ID, ACID_COLUMN_CURRENT_TRANSACTION, ACID_COLUMN_OPERATION).build();
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            if (AcidUtils.isDeleteDelta(path.getParent())) {
                // Avoid reading column data from delete_delta files.
                // Call will come here in case of Minor VACUUM where all delete_delta files are merge together.
                fileColumns = ImmutableList.of();
            } else {
                fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(ACID_COLUMN_ROW_STRUCT).getNestedColumns(), columns);
            }
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_CURRENT_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_OPERATION.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(orcBloomFiltersEnabled);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            if (useOrcColumnNames || isFullAcid) {
                orcColumn = fileColumnsByName.get(column.getName());
            } else if (column.getHiveColumnIndex() >= 0 && column.getHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getHiveColumnIndex());
            }
            Type readType = typeManager.getType(column.getTypeSignature());
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                Domain domain = effectivePredicateDomains.get(column);
                if (domain != null) {
                    predicateBuilder.addColumn(orcColumn.getColumnId(), domain);
                }
            } else if (isFullAcid && readType instanceof RowType && column.getName().equalsIgnoreCase(HiveColumnHandle.UPDATE_ROW_ID_COLUMN_NAME)) {
                HiveType hiveType = column.getHiveType();
                StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType.getTypeInfo();
                ImmutableList.Builder<ColumnAdaptation> builder = new ImmutableList.Builder<>();
                ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
                List<ColumnAdaptation> adaptations = fieldNames.stream().map(acidColumnNames::indexOf).map(c -> ColumnAdaptation.sourceColumn(c, false)).collect(Collectors.toList());
                columnAdaptations.add(ColumnAdaptation.structColumn(structTypeInfo, adaptations));
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        Map<String, Domain> domains = effectivePredicate.getDomains().get().entrySet().stream().collect(toMap(e -> e.getKey().getName(), Map.Entry::getValue));
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, predicateBuilder.build(), start, length, legacyFileTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), indexes, splitMetadata, domains, orcCacheStore, orcCacheProperties, pageMetadataEnabled);
        OrcDeletedRows deletedRows = new OrcDeletedRows(path.getName(), deleteDeltaLocations, new OrcDeleteDeltaPageSourceFactory(sessionUser, configuration, hdfsEnvironment, maxMergeDistance, maxBufferSize, streamBufferSize, maxReadBlockSize, tinyStripeThreshold, lazyReadSmallRanges, orcBloomFiltersEnabled, stats), sessionUser, configuration, hdfsEnvironment, startRowOffsetOfFile);
        boolean eagerload = false;
        if (indexes.isPresent()) {
            eagerload = indexes.get().stream().anyMatch(indexMetadata -> EAGER_LOAD_INDEX_ID.contains(indexMetadata.getIndex().getId()));
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, eagerload, systemMemoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : OrcReader(io.prestosql.orc.OrcReader) DateTimeZone(org.joda.time.DateTimeZone) LONG(io.prestosql.orc.metadata.OrcType.OrcTypeKind.LONG) TupleDomainOrcPredicate(io.prestosql.orc.TupleDomainOrcPredicate) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSessionProperties.isOrcStripeFooterCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcStripeFooterCacheEnabled) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) HiveSessionProperties.isOrcRowDataCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcRowDataCacheEnabled) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowType(io.prestosql.spi.type.RowType) HiveSessionProperties.getOrcStreamBufferSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) ColumnAdaptation(io.prestosql.plugin.hive.orc.OrcPageSource.ColumnAdaptation) FileFormatDataSourceStats(io.prestosql.plugin.hive.FileFormatDataSourceStats) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) HIVE_BAD_DATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) OrcCacheProperties(io.prestosql.orc.OrcCacheProperties) Domain(io.prestosql.spi.predicate.Domain) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) HiveSessionProperties.getOrcMaxBufferSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) ArrayList(java.util.ArrayList) INITIAL_BATCH_SIZE(io.prestosql.orc.OrcReader.INITIAL_BATCH_SIZE) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) HiveSessionProperties.getOrcMaxReadBlockSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) HiveSessionProperties.isOrcFileTailCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcFileTailCacheEnabled) HivePageSourceFactory(io.prestosql.plugin.hive.HivePageSourceFactory) Properties(java.util.Properties) DeleteDeltaLocations(io.prestosql.plugin.hive.DeleteDeltaLocations) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) HiveSessionProperties.isOrcBloomFiltersEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcPageSource.handleException(io.prestosql.plugin.hive.orc.OrcPageSource.handleException) STRUCT(io.prestosql.orc.metadata.OrcType.OrcTypeKind.STRUCT) HiveSessionProperties.getOrcLazyReadSmallRanges(io.prestosql.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) OrcRecordReader(io.prestosql.orc.OrcRecordReader) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Seekable(org.apache.hadoop.fs.Seekable) HiveSessionProperties.getOrcTinyStripeThreshold(io.prestosql.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) HiveConfig(io.prestosql.plugin.hive.HiveConfig) OrcDataSourceIdWithTimeStamp(io.prestosql.orc.OrcDataSourceIdWithTimeStamp) Path(org.apache.hadoop.fs.Path) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) PrestoException(io.prestosql.spi.PrestoException) OrcFileTail(io.prestosql.orc.OrcFileTail) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) INT(io.prestosql.orc.metadata.OrcType.OrcTypeKind.INT) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) HIVE_FILE_MISSING_COLUMN_NAMES(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveSessionProperties.isOrcBloomFiltersCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcBloomFiltersCacheEnabled) OrcDataSource(io.prestosql.orc.OrcDataSource) HiveType(io.prestosql.plugin.hive.HiveType) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcColumn(io.prestosql.orc.OrcColumn) OrcFileTailCacheKey(io.prestosql.orc.OrcFileTailCacheKey) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.getOrcMaxMergeDistance(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HIVE_CANNOT_OPEN_SPLIT(io.prestosql.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) Logger(io.airlift.log.Logger) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) Inject(javax.inject.Inject) HIVE_MISSING_DATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) PositionedReadable(org.apache.hadoop.fs.PositionedReadable) HiveSessionProperties.isOrcRowIndexCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcRowIndexCacheEnabled) HiveUtil(io.prestosql.plugin.hive.HiveUtil) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Maps(com.google.common.collect.Maps) OrcCacheStore(io.prestosql.orc.OrcCacheStore) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) InputStream(java.io.InputStream) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) OrcFileTail(io.prestosql.orc.OrcFileTail) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) OrcColumn(io.prestosql.orc.OrcColumn) OrcReader(io.prestosql.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) OrcFileTailCacheKey(io.prestosql.orc.OrcFileTailCacheKey) Domain(io.prestosql.spi.predicate.Domain) TupleDomain(io.prestosql.spi.predicate.TupleDomain) HiveType(io.prestosql.plugin.hive.HiveType) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) ColumnAdaptation(io.prestosql.plugin.hive.orc.OrcPageSource.ColumnAdaptation) ImmutableList(com.google.common.collect.ImmutableList) RowType(io.prestosql.spi.type.RowType) PrestoException(io.prestosql.spi.PrestoException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ExecutionException(java.util.concurrent.ExecutionException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) OrcDataSource(io.prestosql.orc.OrcDataSource) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) OrcDataSourceIdWithTimeStamp(io.prestosql.orc.OrcDataSourceIdWithTimeStamp) IOException(java.io.IOException) OrcRecordReader(io.prestosql.orc.OrcRecordReader) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) OrcPageSource.handleException(io.prestosql.plugin.hive.orc.OrcPageSource.handleException) PrestoException(io.prestosql.spi.PrestoException) FileNotFoundException(java.io.FileNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) RowType(io.prestosql.spi.type.RowType) Type(io.prestosql.spi.type.Type) HiveType(io.prestosql.plugin.hive.HiveType) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder)

Example 58 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class TestBTreeIndex method testLessThan.

@Test
public void testLessThan() throws IOException, IndexLookUpException {
    BTreeIndex index = new BTreeIndex();
    for (int i = 0; i < 100; i++) {
        List<Pair> pairs = new ArrayList<>();
        Long key = Long.valueOf(100 + i);
        String value = "value" + i;
        pairs.add(new Pair(key, value));
        Pair pair = new Pair("dummyCol", pairs);
        index.addKeyValues(Collections.singletonList(pair));
    }
    File file = getFile();
    index.serialize(new FileOutputStream(file));
    BTreeIndex readIndex = new BTreeIndex();
    readIndex.deserialize(new FileInputStream(file));
    RowExpression comparisonExpression = simplePredicate(OperatorType.LESS_THAN, "dummyCol", BIGINT, 120L);
    Iterator<String> result = readIndex.lookUp(comparisonExpression);
    assertNotNull(result, "Result shouldn't be null");
    assertTrue(result.hasNext());
    Object[] arr = IntStream.iterate(0, n -> n + 1).limit(20).mapToObj(i -> "value" + i).toArray();
    Arrays.sort(arr);
    for (int i = 0; i < 20; i++) {
        assertEquals(arr[i], result.next());
    }
    assertFalse(result.hasNext());
    index.close();
}
Also used : IntStream(java.util.stream.IntStream) Arrays(java.util.Arrays) ConstantExpression(io.prestosql.spi.relation.ConstantExpression) Assert.assertEquals(org.testng.Assert.assertEquals) Test(org.testng.annotations.Test) ArrayList(java.util.ArrayList) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) OperatorType(io.prestosql.spi.function.OperatorType) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) SpecialForm(io.prestosql.spi.relation.SpecialForm) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) Assert.assertFalse(org.testng.Assert.assertFalse) Iterator(java.util.Iterator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) VariableReferenceExpression(io.prestosql.spi.relation.VariableReferenceExpression) FileInputStream(java.io.FileInputStream) UUID(java.util.UUID) Assert.assertNotNull(org.testng.Assert.assertNotNull) Pair(io.prestosql.spi.heuristicindex.Pair) File(java.io.File) List(java.util.List) HeuristicIndexTestUtils.simplePredicate(io.hetu.core.HeuristicIndexTestUtils.simplePredicate) RowExpression(io.prestosql.spi.relation.RowExpression) Assert.assertTrue(org.testng.Assert.assertTrue) IndexLookUpException(io.prestosql.spi.heuristicindex.IndexLookUpException) Index(io.prestosql.spi.heuristicindex.Index) Collections(java.util.Collections) ArrayList(java.util.ArrayList) RowExpression(io.prestosql.spi.relation.RowExpression) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) File(java.io.File) Pair(io.prestosql.spi.heuristicindex.Pair) Test(org.testng.annotations.Test)

Example 59 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project hetu-core by openlookeng.

the class OrcTester method preprocessWriteValueHive.

private static Object preprocessWriteValueHive(Type type, Object value) {
    if (value == null) {
        return null;
    }
    if (type.equals(BOOLEAN)) {
        return value;
    }
    if (type.equals(TINYINT)) {
        return ((Number) value).byteValue();
    }
    if (type.equals(SMALLINT)) {
        return ((Number) value).shortValue();
    }
    if (type.equals(INTEGER)) {
        return ((Number) value).intValue();
    }
    if (type.equals(BIGINT)) {
        return ((Number) value).longValue();
    }
    if (type.equals(REAL)) {
        return ((Number) value).floatValue();
    }
    if (type.equals(DOUBLE)) {
        return ((Number) value).doubleValue();
    }
    if (type instanceof VarcharType) {
        return value;
    }
    if (type instanceof CharType) {
        return new HiveChar((String) value, ((CharType) type).getLength());
    }
    if (type.equals(VARBINARY)) {
        return ((SqlVarbinary) value).getBytes();
    }
    if (type.equals(DATE)) {
        return Date.ofEpochDay(((SqlDate) value).getDays());
    }
    if (type.equals(TIMESTAMP)) {
        return Timestamp.ofEpochMilli(((SqlTimestamp) value).getMillis());
    }
    if (type instanceof DecimalType) {
        return HiveDecimal.create(((SqlDecimal) value).toBigDecimal());
    }
    if (type.getTypeSignature().getBase().equals(StandardTypes.ARRAY)) {
        Type elementType = type.getTypeParameters().get(0);
        return ((List<?>) value).stream().map(element -> preprocessWriteValueHive(elementType, element)).collect(toList());
    }
    if (type.getTypeSignature().getBase().equals(StandardTypes.MAP)) {
        Type keyType = type.getTypeParameters().get(0);
        Type valueType = type.getTypeParameters().get(1);
        Map<Object, Object> newMap = new HashMap<>();
        for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
            newMap.put(preprocessWriteValueHive(keyType, entry.getKey()), preprocessWriteValueHive(valueType, entry.getValue()));
        }
        return newMap;
    }
    if (type.getTypeSignature().getBase().equals(StandardTypes.ROW)) {
        List<?> fieldValues = (List<?>) value;
        List<Type> fieldTypes = type.getTypeParameters();
        List<Object> newStruct = new ArrayList<>();
        for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
            newStruct.add(preprocessWriteValueHive(fieldTypes.get(fieldId), fieldValues.get(fieldId)));
        }
        return newStruct;
    }
    throw new IllegalArgumentException("unsupported type: " + type);
}
Also used : OrcUtil(org.apache.hadoop.hive.ql.io.orc.OrcUtil) DateTimeZone(org.joda.time.DateTimeZone) LZ4(io.prestosql.orc.metadata.CompressionKind.LZ4) PrimitiveObjectInspectorFactory.javaByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveObjectInspectorFactory.javaLongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaLongObjectInspector) Writable(org.apache.hadoop.io.Writable) PrimitiveObjectInspectorFactory.javaTimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaTimestampObjectInspector) Date(org.apache.hadoop.hive.common.type.Date) PrimitiveObjectInspectorFactory.javaDateObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDateObjectInspector) ZSTD(io.prestosql.orc.metadata.CompressionKind.ZSTD) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) BigDecimal(java.math.BigDecimal) Arrays.asList(java.util.Arrays.asList) Slices(io.airlift.slice.Slices) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) RowFieldName(io.prestosql.spi.type.RowFieldName) BigInteger(java.math.BigInteger) Slices.utf8Slice(io.airlift.slice.Slices.utf8Slice) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Assert.assertFalse(org.testng.Assert.assertFalse) IntWritable(org.apache.hadoop.io.IntWritable) PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector) PrimitiveObjectInspectorFactory.javaFloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaFloatObjectInspector) PrimitiveObjectInspectorFactory.javaDoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaDoubleObjectInspector) Set(java.util.Set) VarbinaryType(io.prestosql.spi.type.VarbinaryType) READ_ALL_COLUMNS(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_ALL_COLUMNS) TIMESTAMP(io.prestosql.spi.type.TimestampType.TIMESTAMP) ReaderOptions(org.apache.hadoop.hive.ql.io.orc.OrcFile.ReaderOptions) Metadata(io.prestosql.metadata.Metadata) StandardCharsets(java.nio.charset.StandardCharsets) NONE(io.prestosql.orc.metadata.CompressionKind.NONE) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) BooleanWritable(org.apache.hadoop.io.BooleanWritable) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) FileAssert.fail(org.testng.FileAssert.fail) CompressionKind(io.prestosql.orc.metadata.CompressionKind) PrimitiveObjectInspectorFactory.javaIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaIntObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) JavaHiveCharObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.JavaHiveCharObjectInspector) Iterables(com.google.common.collect.Iterables) Slice(io.airlift.slice.Slice) StandardTypes(io.prestosql.spi.type.StandardTypes) MEGABYTE(io.airlift.units.DataSize.Unit.MEGABYTE) CharType(io.prestosql.spi.type.CharType) DataSize.succinctBytes(io.airlift.units.DataSize.succinctBytes) TypeSignatureParameter(io.prestosql.spi.type.TypeSignatureParameter) HiveCharWritable(org.apache.hadoop.hive.serde2.io.HiveCharWritable) Decimals.rescale(io.prestosql.spi.type.Decimals.rescale) ArrayList(java.util.ArrayList) Lists(com.google.common.collect.Lists) VARCHAR(io.prestosql.spi.type.VarcharType.VARCHAR) SNAPPY(io.prestosql.orc.metadata.CompressionKind.SNAPPY) PrimitiveObjectInspectorFactory.javaShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaShortObjectInspector) BOTH(io.prestosql.orc.OrcWriteValidation.OrcWriteValidationMode.BOTH) DOUBLE(io.prestosql.spi.type.DoubleType.DOUBLE) Properties(java.util.Properties) AbstractIterator(com.google.common.collect.AbstractIterator) FileOutputStream(java.io.FileOutputStream) IOException(java.io.IOException) ObjectInspectorFactory.getStandardStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector) DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) File(java.io.File) SettableStructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector) VARBINARY(io.prestosql.spi.type.VarbinaryType.VARBINARY) DateTimeTestingUtils.sqlTimestampOf(io.prestosql.testing.DateTimeTestingUtils.sqlTimestampOf) Chars.truncateToLengthAndTrimSpaces(io.prestosql.spi.type.Chars.truncateToLengthAndTrimSpaces) FloatWritable(org.apache.hadoop.io.FloatWritable) VarcharType(io.prestosql.spi.type.VarcharType) RecordWriter(org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter) OrcFile(org.apache.hadoop.hive.ql.io.orc.OrcFile) DecimalType(io.prestosql.spi.type.DecimalType) LongWritable(org.apache.hadoop.io.LongWritable) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) ORC_12(io.prestosql.orc.OrcTester.Format.ORC_12) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) ORC_11(io.prestosql.orc.OrcTester.Format.ORC_11) OrcConf(org.apache.orc.OrcConf) RecordReader(org.apache.hadoop.hive.ql.io.orc.RecordReader) Path(org.apache.hadoop.fs.Path) BOOLEAN(io.prestosql.spi.type.BooleanType.BOOLEAN) Reader(org.apache.hadoop.hive.ql.io.orc.Reader) Type(io.prestosql.spi.type.Type) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) ImmutableSet(com.google.common.collect.ImmutableSet) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) ImmutableMap(com.google.common.collect.ImmutableMap) BlockBuilder(io.prestosql.spi.block.BlockBuilder) MetadataManager.createTestMetadataManager(io.prestosql.metadata.MetadataManager.createTestMetadataManager) TINYINT(io.prestosql.spi.type.TinyintType.TINYINT) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) DataSize(io.airlift.units.DataSize) List(java.util.List) ZLIB(io.prestosql.orc.metadata.CompressionKind.ZLIB) Entry(java.util.Map.Entry) Varchars.truncateToLength(io.prestosql.spi.type.Varchars.truncateToLength) Optional(java.util.Optional) READ_COLUMN_IDS_CONF_STR(org.apache.hadoop.hive.serde2.ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) IntStream(java.util.stream.IntStream) TypeInfoFactory.getCharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory.getCharTypeInfo) SqlTimestamp(io.prestosql.spi.type.SqlTimestamp) Assert.assertNull(org.testng.Assert.assertNull) Logger(io.airlift.log.Logger) SESSION(io.prestosql.testing.TestingConnectorSession.SESSION) PrimitiveObjectInspectorFactory.javaBooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaBooleanObjectInspector) SqlDate(io.prestosql.spi.type.SqlDate) Assert.assertEquals(org.testng.Assert.assertEquals) Decimals(io.prestosql.spi.type.Decimals) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) SqlDecimal(io.prestosql.spi.type.SqlDecimal) DoubleWritable(org.apache.hadoop.io.DoubleWritable) Function(java.util.function.Function) PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector) SqlVarbinary(io.prestosql.spi.type.SqlVarbinary) HashSet(java.util.HashSet) NamedTypeSignature(io.prestosql.spi.type.NamedTypeSignature) MAX_BATCH_SIZE(io.prestosql.orc.OrcReader.MAX_BATCH_SIZE) ImmutableList(com.google.common.collect.ImmutableList) ByteWritable(org.apache.hadoop.io.ByteWritable) BytesWritable(org.apache.hadoop.io.BytesWritable) DATE(io.prestosql.spi.type.DateType.DATE) REAL(io.prestosql.spi.type.RealType.REAL) ObjectInspectorFactory.getStandardMapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardMapObjectInspector) Block(io.prestosql.spi.block.Block) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) Iterators.advance(com.google.common.collect.Iterators.advance) Page(io.prestosql.spi.Page) ObjectInspectorFactory.getStandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardListObjectInspector) JobConf(org.apache.hadoop.mapred.JobConf) Collectors.toList(java.util.stream.Collectors.toList) SMALLINT(io.prestosql.spi.type.SmallintType.SMALLINT) Serializer(org.apache.hadoop.hive.serde2.Serializer) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) TestingOrcPredicate.createOrcPredicate(io.prestosql.orc.TestingOrcPredicate.createOrcPredicate) Assert.assertTrue(org.testng.Assert.assertTrue) PrimitiveObjectInspectorFactory.javaStringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory.javaStringObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) VarcharType(io.prestosql.spi.type.VarcharType) HashMap(java.util.HashMap) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) SqlVarbinary(io.prestosql.spi.type.SqlVarbinary) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) VarbinaryType(io.prestosql.spi.type.VarbinaryType) CharType(io.prestosql.spi.type.CharType) VarcharType(io.prestosql.spi.type.VarcharType) DecimalType(io.prestosql.spi.type.DecimalType) Type(io.prestosql.spi.type.Type) DecimalType(io.prestosql.spi.type.DecimalType) Arrays.asList(java.util.Arrays.asList) Lists.newArrayList(com.google.common.collect.Lists.newArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) CharType(io.prestosql.spi.type.CharType) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 60 with BIGINT

use of io.prestosql.spi.type.BigintType.BIGINT in project boostkit-bigdata by kunpengcompute.

the class OrcPageSourceFactory method createOrcPageSource.

public static OrcPageSource createOrcPageSource(HdfsEnvironment hdfsEnvironment, String sessionUser, Configuration configuration, Path path, long start, long length, long fileSize, List<HiveColumnHandle> columns, boolean useOrcColumnNames, boolean isFullAcid, TupleDomain<HiveColumnHandle> effectivePredicate, DateTimeZone legacyFileTimeZone, TypeManager typeManager, DataSize maxMergeDistance, DataSize maxBufferSize, DataSize streamBufferSize, DataSize tinyStripeThreshold, DataSize maxReadBlockSize, boolean lazyReadSmallRanges, boolean orcBloomFiltersEnabled, FileFormatDataSourceStats stats, Optional<DynamicFilterSupplier> dynamicFilters, Optional<DeleteDeltaLocations> deleteDeltaLocations, Optional<Long> startRowOffsetOfFile, Optional<List<IndexMetadata>> indexes, SplitMetadata splitMetadata, OrcCacheStore orcCacheStore, OrcCacheProperties orcCacheProperties, int domainCompactionThreshold, boolean pageMetadataEnabled, long dataSourceLastModifiedTime) {
    for (HiveColumnHandle column : columns) {
        checkArgument(column.getColumnType() == HiveColumnHandle.ColumnType.REGULAR || column.getHiveColumnIndex() == HiveColumnHandle.ROW_ID__COLUMN_INDEX, "column type must be regular: %s", column);
    }
    checkArgument(!effectivePredicate.isNone());
    OrcDataSource orcDataSource;
    try {
        // Always create a lazy Stream. HDFS stream opened only when required.
        FSDataInputStream inputStream = new FSDataInputStream(new LazyFSInputStream(() -> {
            FileSystem fileSystem = hdfsEnvironment.getFileSystem(sessionUser, path, configuration);
            return hdfsEnvironment.doAs(sessionUser, () -> fileSystem.open(path));
        }));
        orcDataSource = new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSize, maxMergeDistance, maxBufferSize, streamBufferSize, lazyReadSmallRanges, inputStream, stats, dataSourceLastModifiedTime);
    } catch (Exception e) {
        if (nullToEmpty(e.getMessage()).trim().equals("Filesystem closed") || e instanceof FileNotFoundException) {
            throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, splitError(e, path, start, length), e);
    }
    AggregatedMemoryContext systemMemoryUsage = newSimpleAggregatedMemoryContext();
    try {
        OrcDataSource readerLocalDataSource = OrcReader.wrapWithCacheIfTiny(orcDataSource, tinyStripeThreshold);
        OrcFileTail fileTail;
        if (orcCacheProperties.isFileTailCacheEnabled()) {
            try {
                OrcDataSourceIdWithTimeStamp orcDataSourceIdWithTimeStamp = new OrcDataSourceIdWithTimeStamp(readerLocalDataSource.getId(), readerLocalDataSource.getLastModifiedTime());
                fileTail = orcCacheStore.getFileTailCache().get(new OrcFileTailCacheKey(orcDataSourceIdWithTimeStamp), () -> OrcPageSourceFactory.createFileTail(orcDataSource));
            } catch (UncheckedExecutionException | ExecutionException executionException) {
                handleCacheLoadException(executionException);
                log.debug(executionException.getCause(), "Error while caching the Orc file tail. Falling back to default flow");
                fileTail = OrcPageSourceFactory.createFileTail(orcDataSource);
            }
        } else {
            fileTail = OrcPageSourceFactory.createFileTail(orcDataSource);
        }
        OrcReader reader = new OrcReader(readerLocalDataSource, fileTail, maxMergeDistance, tinyStripeThreshold, maxReadBlockSize);
        List<OrcColumn> fileColumns = reader.getRootColumn().getNestedColumns();
        List<OrcColumn> fileReadColumns = isFullAcid ? new ArrayList<>(columns.size() + 5) : new ArrayList<>(columns.size());
        List<Type> fileReadTypes = isFullAcid ? new ArrayList<>(columns.size() + 5) : new ArrayList<>(columns.size());
        ImmutableList<String> acidColumnNames = null;
        List<ColumnAdaptation> columnAdaptations = new ArrayList<>(columns.size());
        // Only Hive ACID files will begin with bucket_
        boolean fileNameContainsBucket = path.getName().contains("bucket");
        if (isFullAcid && fileNameContainsBucket) {
            // Skip the acid schema check in case of non-ACID files
            acidColumnNames = ImmutableList.<String>builder().add(ACID_COLUMN_ORIGINAL_TRANSACTION, ACID_COLUMN_BUCKET, ACID_COLUMN_ROW_ID, ACID_COLUMN_CURRENT_TRANSACTION, ACID_COLUMN_OPERATION).build();
            verifyAcidSchema(reader, path);
            Map<String, OrcColumn> acidColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
            if (AcidUtils.isDeleteDelta(path.getParent())) {
                // Avoid reading column data from delete_delta files.
                // Call will come here in case of Minor VACUUM where all delete_delta files are merge together.
                fileColumns = ImmutableList.of();
            } else {
                fileColumns = ensureColumnNameConsistency(acidColumnsByName.get(ACID_COLUMN_ROW_STRUCT).getNestedColumns(), columns);
            }
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_ORIGINAL_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_BUCKET.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_ROW_ID.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_CURRENT_TRANSACTION.toLowerCase(ENGLISH)));
            fileReadTypes.add(BIGINT);
            fileReadColumns.add(acidColumnsByName.get(ACID_COLUMN_OPERATION.toLowerCase(ENGLISH)));
            fileReadTypes.add(INTEGER);
        }
        Map<String, OrcColumn> fileColumnsByName = ImmutableMap.of();
        if (useOrcColumnNames || isFullAcid) {
            verifyFileHasColumnNames(fileColumns, path);
            // Convert column names read from ORC files to lower case to be consistent with those stored in Hive Metastore
            fileColumnsByName = uniqueIndex(fileColumns, orcColumn -> orcColumn.getColumnName().toLowerCase(ENGLISH));
        }
        TupleDomainOrcPredicateBuilder predicateBuilder = TupleDomainOrcPredicate.builder().setBloomFiltersEnabled(orcBloomFiltersEnabled);
        Map<HiveColumnHandle, Domain> effectivePredicateDomains = effectivePredicate.getDomains().orElseThrow(() -> new IllegalArgumentException("Effective predicate is none"));
        for (HiveColumnHandle column : columns) {
            OrcColumn orcColumn = null;
            if (useOrcColumnNames || isFullAcid) {
                orcColumn = fileColumnsByName.get(column.getName());
            } else if (column.getHiveColumnIndex() >= 0 && column.getHiveColumnIndex() < fileColumns.size()) {
                orcColumn = fileColumns.get(column.getHiveColumnIndex());
            }
            Type readType = typeManager.getType(column.getTypeSignature());
            if (orcColumn != null) {
                int sourceIndex = fileReadColumns.size();
                columnAdaptations.add(ColumnAdaptation.sourceColumn(sourceIndex));
                fileReadColumns.add(orcColumn);
                fileReadTypes.add(readType);
                Domain domain = effectivePredicateDomains.get(column);
                if (domain != null) {
                    predicateBuilder.addColumn(orcColumn.getColumnId(), domain);
                }
            } else if (isFullAcid && readType instanceof RowType && column.getName().equalsIgnoreCase(HiveColumnHandle.UPDATE_ROW_ID_COLUMN_NAME)) {
                HiveType hiveType = column.getHiveType();
                StructTypeInfo structTypeInfo = (StructTypeInfo) hiveType.getTypeInfo();
                ImmutableList.Builder<ColumnAdaptation> builder = new ImmutableList.Builder<>();
                ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
                List<ColumnAdaptation> adaptations = fieldNames.stream().map(acidColumnNames::indexOf).map(c -> ColumnAdaptation.sourceColumn(c, false)).collect(Collectors.toList());
                columnAdaptations.add(ColumnAdaptation.structColumn(structTypeInfo, adaptations));
            } else {
                columnAdaptations.add(ColumnAdaptation.nullColumn(readType));
            }
        }
        Map<String, Domain> domains = effectivePredicate.getDomains().get().entrySet().stream().collect(toMap(e -> e.getKey().getName(), Map.Entry::getValue));
        OrcRecordReader recordReader = reader.createRecordReader(fileReadColumns, fileReadTypes, predicateBuilder.build(), start, length, legacyFileTimeZone, systemMemoryUsage, INITIAL_BATCH_SIZE, exception -> handleException(orcDataSource.getId(), exception), indexes, splitMetadata, domains, orcCacheStore, orcCacheProperties, pageMetadataEnabled);
        OrcDeletedRows deletedRows = new OrcDeletedRows(path.getName(), deleteDeltaLocations, new OrcDeleteDeltaPageSourceFactory(sessionUser, configuration, hdfsEnvironment, maxMergeDistance, maxBufferSize, streamBufferSize, maxReadBlockSize, tinyStripeThreshold, lazyReadSmallRanges, orcBloomFiltersEnabled, stats), sessionUser, configuration, hdfsEnvironment, startRowOffsetOfFile);
        boolean eagerload = false;
        if (indexes.isPresent()) {
            eagerload = indexes.get().stream().anyMatch(indexMetadata -> EAGER_LOAD_INDEX_ID.contains(indexMetadata.getIndex().getId()));
        }
        return new OrcPageSource(recordReader, columnAdaptations, orcDataSource, deletedRows, eagerload, systemMemoryUsage, stats);
    } catch (Exception e) {
        try {
            orcDataSource.close();
        } catch (IOException ignored) {
        }
        if (e instanceof PrestoException) {
            throw (PrestoException) e;
        }
        String message = splitError(e, path, start, length);
        if (e instanceof BlockMissingException) {
            throw new PrestoException(HIVE_MISSING_DATA, message, e);
        }
        throw new PrestoException(HIVE_CANNOT_OPEN_SPLIT, message, e);
    }
}
Also used : OrcReader(io.prestosql.orc.OrcReader) DateTimeZone(org.joda.time.DateTimeZone) LONG(io.prestosql.orc.metadata.OrcType.OrcTypeKind.LONG) TupleDomainOrcPredicate(io.prestosql.orc.TupleDomainOrcPredicate) FileSystem(org.apache.hadoop.fs.FileSystem) HiveSessionProperties.isOrcStripeFooterCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcStripeFooterCacheEnabled) Maps.uniqueIndex(com.google.common.collect.Maps.uniqueIndex) HiveSessionProperties.isOrcRowDataCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcRowDataCacheEnabled) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) RowType(io.prestosql.spi.type.RowType) HiveSessionProperties.getOrcStreamBufferSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcStreamBufferSize) Predicate(com.huawei.boostkit.omnidata.model.Predicate) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) ENGLISH(java.util.Locale.ENGLISH) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) DataReaderFactory(com.huawei.boostkit.omnidata.reader.DataReaderFactory) ColumnAdaptation(io.prestosql.plugin.hive.orc.OrcPageSource.ColumnAdaptation) PageSourceUtil.buildPushdownContext(io.prestosql.plugin.hive.util.PageSourceUtil.buildPushdownContext) FileFormatDataSourceStats(io.prestosql.plugin.hive.FileFormatDataSourceStats) DataSource(com.huawei.boostkit.omnidata.model.datasource.DataSource) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) HiveOffloadExpression(io.prestosql.plugin.hive.HiveOffloadExpression) ConnectorPageSource(io.prestosql.spi.connector.ConnectorPageSource) HIVE_BAD_DATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_BAD_DATA) OrcCacheProperties(io.prestosql.orc.OrcCacheProperties) Domain(io.prestosql.spi.predicate.Domain) SplitMetadata(io.prestosql.spi.heuristicindex.SplitMetadata) HiveSessionProperties.getOrcMaxBufferSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxBufferSize) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) ArrayList(java.util.ArrayList) INITIAL_BATCH_SIZE(io.prestosql.orc.OrcReader.INITIAL_BATCH_SIZE) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) DynamicFilterSupplier(io.prestosql.spi.dynamicfilter.DynamicFilterSupplier) HiveSessionProperties.getOrcMaxReadBlockSize(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxReadBlockSize) HiveSessionProperties.isOrcFileTailCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcFileTailCacheEnabled) HivePageSourceFactory(io.prestosql.plugin.hive.HivePageSourceFactory) Properties(java.util.Properties) DeleteDeltaLocations(io.prestosql.plugin.hive.DeleteDeltaLocations) TypeManager(io.prestosql.spi.type.TypeManager) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) HiveSessionProperties.isOrcBloomFiltersEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcBloomFiltersEnabled) OrcPageSource.handleException(io.prestosql.plugin.hive.orc.OrcPageSource.handleException) STRUCT(io.prestosql.orc.metadata.OrcType.OrcTypeKind.STRUCT) HiveSessionProperties.getOrcLazyReadSmallRanges(io.prestosql.plugin.hive.HiveSessionProperties.getOrcLazyReadSmallRanges) OrcRecordReader(io.prestosql.orc.OrcRecordReader) HivePartitionKey(io.prestosql.plugin.hive.HivePartitionKey) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) DataReader(com.huawei.boostkit.omnidata.reader.DataReader) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Seekable(org.apache.hadoop.fs.Seekable) HiveSessionProperties.getOrcTinyStripeThreshold(io.prestosql.plugin.hive.HiveSessionProperties.getOrcTinyStripeThreshold) Preconditions.checkArgument(com.google.common.base.Preconditions.checkArgument) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Collectors.toMap(java.util.stream.Collectors.toMap) HiveConfig(io.prestosql.plugin.hive.HiveConfig) OrcDataSourceIdWithTimeStamp(io.prestosql.orc.OrcDataSourceIdWithTimeStamp) Path(org.apache.hadoop.fs.Path) OMNIDATA_CLIENT_TARGET_LIST(com.huawei.boostkit.omnidata.transfer.OmniDataProperty.OMNIDATA_CLIENT_TARGET_LIST) Type(io.prestosql.spi.type.Type) BIGINT(io.prestosql.spi.type.BigintType.BIGINT) PrestoException(io.prestosql.spi.PrestoException) OrcFileTail(io.prestosql.orc.OrcFileTail) OrcTypeKind(io.prestosql.orc.metadata.OrcType.OrcTypeKind) ImmutableMap(com.google.common.collect.ImmutableMap) INT(io.prestosql.orc.metadata.OrcType.OrcTypeKind.INT) AcidUtils.isFullAcidTable(org.apache.hadoop.hive.ql.io.AcidUtils.isFullAcidTable) HIVE_FILE_MISSING_COLUMN_NAMES(io.prestosql.plugin.hive.HiveErrorCode.HIVE_FILE_MISSING_COLUMN_NAMES) HiveSessionProperties.isOrcBloomFiltersCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcBloomFiltersCacheEnabled) HivePushDownPageSource(io.prestosql.plugin.hive.HivePushDownPageSource) OrcDataSource(io.prestosql.orc.OrcDataSource) HiveType(io.prestosql.plugin.hive.HiveType) Collectors(java.util.stream.Collectors) FileNotFoundException(java.io.FileNotFoundException) String.format(java.lang.String.format) OrcColumn(io.prestosql.orc.OrcColumn) OrcFileTailCacheKey(io.prestosql.orc.OrcFileTailCacheKey) DataSize(io.airlift.units.DataSize) List(java.util.List) HiveSessionProperties.getOrcMaxMergeDistance(io.prestosql.plugin.hive.HiveSessionProperties.getOrcMaxMergeDistance) Function.identity(java.util.function.Function.identity) Optional(java.util.Optional) Pattern(java.util.regex.Pattern) IndexMetadata(io.prestosql.spi.heuristicindex.IndexMetadata) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) HIVE_CANNOT_OPEN_SPLIT(io.prestosql.plugin.hive.HiveErrorCode.HIVE_CANNOT_OPEN_SPLIT) Logger(io.airlift.log.Logger) FixedPageSource(io.prestosql.spi.connector.FixedPageSource) Strings.nullToEmpty(com.google.common.base.Strings.nullToEmpty) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) HashMap(java.util.HashMap) INTEGER(io.prestosql.spi.type.IntegerType.INTEGER) OptionalInt(java.util.OptionalInt) TaskSource(com.huawei.boostkit.omnidata.model.TaskSource) Inject(javax.inject.Inject) HIVE_MISSING_DATA(io.prestosql.plugin.hive.HiveErrorCode.HIVE_MISSING_DATA) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) Objects.requireNonNull(java.util.Objects.requireNonNull) PositionedReadable(org.apache.hadoop.fs.PositionedReadable) HiveSessionProperties.isOrcRowIndexCacheEnabled(io.prestosql.plugin.hive.HiveSessionProperties.isOrcRowIndexCacheEnabled) HiveUtil(io.prestosql.plugin.hive.HiveUtil) TupleDomain(io.prestosql.spi.predicate.TupleDomain) Page(io.prestosql.spi.Page) Maps(com.google.common.collect.Maps) OrcCacheStore(io.prestosql.orc.OrcCacheStore) OpenLooKengDeserializer(com.huawei.boostkit.omnidata.decode.impl.OpenLooKengDeserializer) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) InputStream(java.io.InputStream) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder) FileNotFoundException(java.io.FileNotFoundException) ArrayList(java.util.ArrayList) OrcFileTail(io.prestosql.orc.OrcFileTail) FileSystem(org.apache.hadoop.fs.FileSystem) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) OrcColumn(io.prestosql.orc.OrcColumn) OrcReader(io.prestosql.orc.OrcReader) FSDataInputStream(org.apache.hadoop.fs.FSDataInputStream) OrcFileTailCacheKey(io.prestosql.orc.OrcFileTailCacheKey) Domain(io.prestosql.spi.predicate.Domain) TupleDomain(io.prestosql.spi.predicate.TupleDomain) HiveType(io.prestosql.plugin.hive.HiveType) Map(java.util.Map) ImmutableMap.toImmutableMap(com.google.common.collect.ImmutableMap.toImmutableMap) Collectors.toMap(java.util.stream.Collectors.toMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) ColumnAdaptation(io.prestosql.plugin.hive.orc.OrcPageSource.ColumnAdaptation) ImmutableList(com.google.common.collect.ImmutableList) RowType(io.prestosql.spi.type.RowType) PrestoException(io.prestosql.spi.PrestoException) StructTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) ExecutionException(java.util.concurrent.ExecutionException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) OrcDataSource(io.prestosql.orc.OrcDataSource) OrcDataSourceId(io.prestosql.orc.OrcDataSourceId) OrcDataSourceIdWithTimeStamp(io.prestosql.orc.OrcDataSourceIdWithTimeStamp) IOException(java.io.IOException) OrcRecordReader(io.prestosql.orc.OrcRecordReader) AggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext) AggregatedMemoryContext.newSimpleAggregatedMemoryContext(io.prestosql.memory.context.AggregatedMemoryContext.newSimpleAggregatedMemoryContext) BlockMissingException(org.apache.hadoop.hdfs.BlockMissingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) OrcPageSource.handleException(io.prestosql.plugin.hive.orc.OrcPageSource.handleException) PrestoException(io.prestosql.spi.PrestoException) FileNotFoundException(java.io.FileNotFoundException) UncheckedExecutionException(com.google.common.util.concurrent.UncheckedExecutionException) OrcReader.handleCacheLoadException(io.prestosql.orc.OrcReader.handleCacheLoadException) RowType(io.prestosql.spi.type.RowType) Type(io.prestosql.spi.type.Type) HiveType(io.prestosql.plugin.hive.HiveType) TupleDomainOrcPredicateBuilder(io.prestosql.orc.TupleDomainOrcPredicate.TupleDomainOrcPredicateBuilder)

Aggregations

BIGINT (io.prestosql.spi.type.BigintType.BIGINT)62 ImmutableList (com.google.common.collect.ImmutableList)54 Optional (java.util.Optional)50 Test (org.testng.annotations.Test)49 ImmutableMap (com.google.common.collect.ImmutableMap)45 Symbol (io.prestosql.spi.plan.Symbol)36 List (java.util.List)31 Type (io.prestosql.spi.type.Type)24 PlanNodeId (io.prestosql.spi.plan.PlanNodeId)21 PlanMatchPattern.values (io.prestosql.sql.planner.assertions.PlanMatchPattern.values)21 ArrayList (java.util.ArrayList)20 Assignments (io.prestosql.spi.plan.Assignments)19 VARCHAR (io.prestosql.spi.type.VarcharType.VARCHAR)19 Assert.assertTrue (org.testng.Assert.assertTrue)19 Assert.assertEquals (org.testng.Assert.assertEquals)18 Metadata (io.prestosql.metadata.Metadata)16 DOUBLE (io.prestosql.spi.type.DoubleType.DOUBLE)16 BeforeClass (org.testng.annotations.BeforeClass)16 Assert.assertFalse (org.testng.Assert.assertFalse)15 MetadataManager.createTestMetadataManager (io.prestosql.metadata.MetadataManager.createTestMetadataManager)14