Search in sources :

Example 36 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class ParquetDereferencePushDown method createNestedColumn.

private Subfield createNestedColumn(Map<String, ColumnHandle> baseColumnHandles, RowExpression rowExpression, ExpressionOptimizer expressionOptimizer, ConnectorSession session) {
    if (!(rowExpression instanceof SpecialFormExpression) || ((SpecialFormExpression) rowExpression).getForm() != DEREFERENCE) {
        throw new IllegalArgumentException("expecting SpecialFormExpression(DEREFERENCE), but got: " + rowExpression);
    }
    RowExpression currentRowExpression = rowExpression;
    List<Subfield.PathElement> elements = new ArrayList<>();
    while (true) {
        if (currentRowExpression instanceof VariableReferenceExpression) {
            Collections.reverse(elements);
            String name = ((VariableReferenceExpression) currentRowExpression).getName();
            ColumnHandle handle = baseColumnHandles.get(name);
            checkArgument(handle != null, "Missing Column handle: " + name);
            String originalColumnName = getColumnName(handle);
            return new Subfield(originalColumnName, unmodifiableList(elements));
        }
        if (currentRowExpression instanceof SpecialFormExpression && ((SpecialFormExpression) currentRowExpression).getForm() == DEREFERENCE) {
            SpecialFormExpression dereferenceExpression = (SpecialFormExpression) currentRowExpression;
            RowExpression base = dereferenceExpression.getArguments().get(0);
            RowType baseType = (RowType) base.getType();
            RowExpression indexExpression = expressionOptimizer.optimize(dereferenceExpression.getArguments().get(1), ExpressionOptimizer.Level.OPTIMIZED, session);
            if (indexExpression instanceof ConstantExpression) {
                Object index = ((ConstantExpression) indexExpression).getValue();
                if (index instanceof Number) {
                    Optional<String> fieldName = baseType.getFields().get(((Number) index).intValue()).getName();
                    if (fieldName.isPresent()) {
                        elements.add(new Subfield.NestedField(fieldName.get()));
                        currentRowExpression = base;
                        continue;
                    }
                }
            }
        }
        break;
    }
    throw new IllegalArgumentException("expecting SpecialFormExpression(DEREFERENCE) with constants for indices, but got: " + currentRowExpression);
}
Also used : ColumnHandle(com.facebook.presto.spi.ColumnHandle) ConstantExpression(com.facebook.presto.spi.relation.ConstantExpression) ArrayList(java.util.ArrayList) RowExpression(com.facebook.presto.spi.relation.RowExpression) RowType(com.facebook.presto.common.type.RowType) VariableReferenceExpression(com.facebook.presto.spi.relation.VariableReferenceExpression) SpecialFormExpression(com.facebook.presto.spi.relation.SpecialFormExpression) ParquetTypeUtils.pushdownColumnNameForSubfield(com.facebook.presto.parquet.ParquetTypeUtils.pushdownColumnNameForSubfield) Subfield(com.facebook.presto.common.Subfield)

Example 37 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class TestHiveSplitManager method assertRedundantColumnDomains.

private void assertRedundantColumnDomains(Range predicateRange, PartitionStatistics partitionStatistics, List<Set<ColumnHandle>> expectedRedundantColumnDomains, HiveColumnHandle columnHandle) throws Exception {
    // Prepare query predicate tuple domain
    TupleDomain<ColumnHandle> queryTupleDomain = TupleDomain.fromColumnDomains(Optional.of(ImmutableList.of(new ColumnDomain<>(columnHandle, Domain.create(SortedRangeSet.copyOf(predicateRange.getType(), ImmutableList.of(predicateRange)), false)))));
    // Prepare partition with stats
    PartitionWithStatistics partitionWithStatistics = new PartitionWithStatistics(new Partition("test_db", "test_table", ImmutableList.of(PARTITION_VALUE), new Storage(fromHiveStorageFormat(ORC), "location", Optional.empty(), true, ImmutableMap.of(), ImmutableMap.of()), COLUMNS, ImmutableMap.of(), Optional.empty(), false, true, 0), PARTITION_NAME, partitionStatistics);
    HiveClientConfig hiveClientConfig = new HiveClientConfig().setPartitionStatisticsBasedOptimizationEnabled(true);
    HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveClientConfig, new MetastoreClientConfig()), ImmutableSet.of()), new MetastoreClientConfig(), new NoHdfsAuthentication());
    HiveMetadataFactory metadataFactory = new HiveMetadataFactory(new TestingExtendedHiveMetastore(TEST_TABLE, partitionWithStatistics), hdfsEnvironment, new HivePartitionManager(FUNCTION_AND_TYPE_MANAGER, hiveClientConfig), DateTimeZone.forOffsetHours(1), true, false, false, false, true, true, hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getMaxPartitionsPerScan(), false, FUNCTION_AND_TYPE_MANAGER, new HiveLocationService(hdfsEnvironment), FUNCTION_RESOLUTION, ROW_EXPRESSION_SERVICE, FILTER_STATS_CALCULATOR_SERVICE, new TableParameterCodec(), HiveTestUtils.PARTITION_UPDATE_CODEC, HiveTestUtils.PARTITION_UPDATE_SMILE_CODEC, executor, new HiveTypeTranslator(), new HiveStagingFileCommitter(hdfsEnvironment, executor), new HiveZeroRowFileCreator(hdfsEnvironment, new OutputStreamDataSinkFactory(), executor), TEST_SERVER_VERSION, new HivePartitionObjectBuilder(), new HiveEncryptionInformationProvider(ImmutableList.of()), new HivePartitionStats(), new HiveFileRenamer(), HiveColumnConverterProvider.DEFAULT_COLUMN_CONVERTER_PROVIDER);
    HiveSplitManager splitManager = new HiveSplitManager(new TestingHiveTransactionManager(metadataFactory), new NamenodeStats(), hdfsEnvironment, new TestingDirectoryLister(), directExecutor(), new HiveCoercionPolicy(FUNCTION_AND_TYPE_MANAGER), new CounterStat(), 100, hiveClientConfig.getMaxOutstandingSplitsSize(), hiveClientConfig.getMinPartitionBatchSize(), hiveClientConfig.getMaxPartitionBatchSize(), hiveClientConfig.getSplitLoaderConcurrency(), false, new ConfigBasedCacheQuotaRequirementProvider(new CacheConfig()), new HiveEncryptionInformationProvider(ImmutableList.of()));
    HiveColumnHandle partitionColumn = new HiveColumnHandle("ds", HIVE_STRING, parseTypeSignature(VARCHAR), MAX_PARTITION_KEY_COLUMN_INDEX, PARTITION_KEY, Optional.empty(), Optional.empty());
    List<HivePartition> partitions = ImmutableList.of(new HivePartition(new SchemaTableName("test_schema", "test_table"), PARTITION_NAME, ImmutableMap.of(partitionColumn, NullableValue.of(createUnboundedVarcharType(), utf8Slice(PARTITION_VALUE)))));
    TupleDomain<Subfield> domainPredicate = queryTupleDomain.transform(HiveColumnHandle.class::cast).transform(column -> new Subfield(column.getName(), ImmutableList.of()));
    ConnectorSplitSource splitSource = splitManager.getSplits(new HiveTransactionHandle(), new TestingConnectorSession(new HiveSessionProperties(hiveClientConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig(), new CacheConfig()).getSessionProperties()), new HiveTableLayoutHandle(new SchemaTableName("test_schema", "test_table"), "test_path", ImmutableList.of(partitionColumn), COLUMNS, ImmutableMap.of(), partitions, domainPredicate, TRUE_CONSTANT, ImmutableMap.of(partitionColumn.getName(), partitionColumn, columnHandle.getName(), columnHandle), queryTupleDomain, Optional.empty(), Optional.empty(), false, "layout", Optional.empty(), false), SPLIT_SCHEDULING_CONTEXT);
    List<Set<ColumnHandle>> actualRedundantColumnDomains = splitSource.getNextBatch(NOT_PARTITIONED, 100).get().getSplits().stream().map(HiveSplit.class::cast).map(HiveSplit::getRedundantColumnDomains).collect(toImmutableList());
    assertEquals(actualRedundantColumnDomains, expectedRedundantColumnDomains);
}
Also used : CounterStat(com.facebook.airlift.stats.CounterStat) Subfield(com.facebook.presto.common.Subfield) ColumnHandle(com.facebook.presto.spi.ColumnHandle) TestingConnectorSession(com.facebook.presto.testing.TestingConnectorSession) Storage(com.facebook.presto.hive.metastore.Storage) OutputStreamDataSinkFactory(com.facebook.presto.hive.datasink.OutputStreamDataSinkFactory) Set(java.util.Set) SortedRangeSet(com.facebook.presto.common.predicate.SortedRangeSet) ImmutableSet(com.google.common.collect.ImmutableSet) NoHdfsAuthentication(com.facebook.presto.hive.authentication.NoHdfsAuthentication) CacheConfig(com.facebook.presto.cache.CacheConfig) Partition(com.facebook.presto.hive.metastore.Partition) ConnectorSplitSource(com.facebook.presto.spi.ConnectorSplitSource) SchemaTableName(com.facebook.presto.spi.SchemaTableName) PartitionWithStatistics(com.facebook.presto.hive.metastore.PartitionWithStatistics)

Example 38 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class TestSubfieldExtractor method assertToRowExpression.

private void assertToRowExpression(String subfieldPath, Type type) {
    Subfield subfield = new Subfield(subfieldPath);
    Optional<Subfield> recreatedSubfield = subfieldExtractor.extract(subfieldExtractor.toRowExpression(subfield, type));
    assertTrue(recreatedSubfield.isPresent());
    assertEquals(recreatedSubfield.get(), subfield);
}
Also used : Subfield(com.facebook.presto.common.Subfield)

Example 39 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class AbstractOrcRecordReader method getIncludedOrcColumns.

private static Set<Integer> getIncludedOrcColumns(List<OrcType> types, Set<Integer> includedColumns, Map<Integer, List<Subfield>> requiredSubfields) {
    Set<Integer> includes = new LinkedHashSet<>();
    OrcType root = types.get(0);
    for (int includedColumn : includedColumns) {
        List<Subfield> subfields = Optional.ofNullable(requiredSubfields.get(includedColumn)).orElse(ImmutableList.of());
        includeOrcColumnsRecursive(types, includes, root.getFieldTypeIndex(includedColumn), subfields);
    }
    return includes;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) OrcType(com.facebook.presto.orc.metadata.OrcType) Subfield(com.facebook.presto.common.Subfield)

Example 40 with Subfield

use of com.facebook.presto.common.Subfield in project presto by prestodb.

the class AbstractOrcRecordReader method getRequiredFields.

private static Optional<Map<String, List<Subfield>>> getRequiredFields(List<Subfield> requiredSubfields) {
    if (requiredSubfields.isEmpty()) {
        return Optional.empty();
    }
    Map<String, List<Subfield>> fields = new HashMap<>();
    for (Subfield subfield : requiredSubfields) {
        List<Subfield.PathElement> path = subfield.getPath();
        String name = ((Subfield.NestedField) path.get(0)).getName().toLowerCase(Locale.ENGLISH);
        fields.computeIfAbsent(name, k -> new ArrayList<>());
        if (path.size() > 1) {
            fields.get(name).add(new Subfield("c", path.subList(1, path.size())));
        }
    }
    return Optional.of(ImmutableMap.copyOf(fields));
}
Also used : HashMap(java.util.HashMap) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) Subfield(com.facebook.presto.common.Subfield)

Aggregations

Subfield (com.facebook.presto.common.Subfield)54 ImmutableMap (com.google.common.collect.ImmutableMap)27 Map (java.util.Map)27 ImmutableList (com.google.common.collect.ImmutableList)25 List (java.util.List)24 TupleDomainFilter (com.facebook.presto.common.predicate.TupleDomainFilter)22 Type (com.facebook.presto.common.type.Type)21 ArrayList (java.util.ArrayList)21 Optional (java.util.Optional)20 Test (org.testng.annotations.Test)19 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)18 ImmutableMap.toImmutableMap (com.google.common.collect.ImmutableMap.toImmutableMap)18 Collectors.toList (java.util.stream.Collectors.toList)12 TupleDomain (com.facebook.presto.common.predicate.TupleDomain)11 ColumnHandle (com.facebook.presto.spi.ColumnHandle)11 String.format (java.lang.String.format)11 Set (java.util.Set)11 Domain (com.facebook.presto.common.predicate.Domain)10 CharType (com.facebook.presto.common.type.CharType)10 DecimalType (com.facebook.presto.common.type.DecimalType)10