Search in sources :

Example 1 with PartialAndFinalAggregationType

use of io.prestosql.spi.PartialAndFinalAggregationType in project hetu-core by openlookeng.

the class HiveMetadata method validateAndGetSortAggregationType.

@Override
public PartialAndFinalAggregationType validateAndGetSortAggregationType(ConnectorSession session, ConnectorTableHandle tableHandle, List<String> groupKeyNames) {
    PartialAndFinalAggregationType partialAndFinalAggregationType = new PartialAndFinalAggregationType();
    ConnectorTableMetadata connectorTableMetadata = getTableMetadata(session, ((HiveTableHandle) tableHandle).getSchemaTableName());
    List<SortingColumn> sortingColumn = (List<SortingColumn>) connectorTableMetadata.getProperties().get(HiveTableProperties.SORTED_BY_PROPERTY);
    boolean isSortingColumnsNotPresent = (sortingColumn == null) || (sortingColumn.size() == 0);
    List<String> partitionedBy = new ArrayList<>();
    List<String> partitionedByTemp = (List<String>) connectorTableMetadata.getProperties().get(HiveTableProperties.PARTITIONED_BY_PROPERTY);
    if ((partitionedByTemp != null) && (partitionedByTemp.size() != 0)) {
        partitionedBy.addAll(partitionedByTemp);
        if (isSortingColumnsNotPresent && (partitionedByTemp.size() != groupKeyNames.size())) {
            return partialAndFinalAggregationType;
        }
    } else if (isSortingColumnsNotPresent) {
        return partialAndFinalAggregationType;
    }
    int bucketCount = 0;
    List<String> bucketedColumns = new ArrayList<>();
    if (!isSortingColumnsNotPresent) {
        bucketedColumns.addAll((List<String>) connectorTableMetadata.getProperties().get(HiveTableProperties.BUCKETED_BY_PROPERTY));
        if (null != bucketedColumns) {
            bucketCount = (int) connectorTableMetadata.getProperties().get(HiveTableProperties.BUCKET_COUNT_PROPERTY);
        }
    }
    List<String> sortedColumnNames = new ArrayList<>();
    if ((sortingColumn != null) && (sortingColumn.size() != 0)) {
        sortedColumnNames.addAll(sortingColumn.stream().map(column -> column.getColumnName()).collect(Collectors.toList()));
    }
    // grouping key should be sub set of sorted By and it should match all partition by columns
    if ((partitionedBy.size() + sortedColumnNames.size() < groupKeyNames.size()) || (partitionedBy.size() > groupKeyNames.size())) {
        // sorted columns are less than join criteria columns
        log.debug("number of sorted columns " + sortedColumnNames.size() + "are less join column size " + groupKeyNames.size());
        return partialAndFinalAggregationType;
    }
    // bucketby columns and groupby Columns should be same.
    // or when bucket count should be 1 and bucket column that matches with groupBy
    // or when bucket count is 0 no need to compare buckets
    int partitionedByCount = partitionedBy.size() == 0 ? 0 : partitionedBy.size() - 1;
    boolean singleOrZeroBucketedColumn = (((bucketCount == 1) && (bucketedColumns.size() == 1) && (groupKeyNames.get(partitionedByCount).equals(bucketedColumns.get(0)))) || (bucketCount == 0));
    if ((bucketCount == 1) && (bucketedColumns.size() > 1)) {
        int minSize = Math.min(groupKeyNames.size() - partitionedBy.size(), bucketedColumns.size());
        int partSize = partitionedBy.size();
        for (int keyIdx = 0; keyIdx < minSize; keyIdx++) {
            if (!groupKeyNames.get(keyIdx + partSize).equals(bucketedColumns.get(keyIdx))) {
                return partialAndFinalAggregationType;
            }
        }
        singleOrZeroBucketedColumn = true;
    }
    for (int numOfComparedKeys = 0; numOfComparedKeys < partitionedBy.size(); numOfComparedKeys++) {
        if ((!groupKeyNames.get(numOfComparedKeys).equals(partitionedBy.get(numOfComparedKeys)))) {
            return partialAndFinalAggregationType;
        }
    }
    if (groupKeyNames.size() == partitionedBy.size()) {
        partialAndFinalAggregationType.setPartialAsSortAndFinalAsHashAggregation(true);
        return partialAndFinalAggregationType;
    }
    if (singleOrZeroBucketedColumn || (groupKeyNames.size() == (bucketedColumns.size() + partitionedBy.size()))) {
        int numOfCmpKeysAfterPartitionedBy = partitionedBy.size();
        for (int numOfComparedKeys = 0; numOfComparedKeys < groupKeyNames.size() - partitionedBy.size(); numOfComparedKeys++, numOfCmpKeysAfterPartitionedBy++) {
            boolean bucketedColumnsResult = !singleOrZeroBucketedColumn && (!groupKeyNames.get(numOfComparedKeys).equals(bucketedColumns.get(numOfComparedKeys)));
            if ((!groupKeyNames.get(numOfCmpKeysAfterPartitionedBy).equals(sortedColumnNames.get(numOfComparedKeys))) || (!singleOrZeroBucketedColumn && bucketedColumnsResult)) {
                if (log.isDebugEnabled()) {
                    final String[] dbgGroupKeyNames = { new String("") };
                    groupKeyNames.stream().forEach(k -> dbgGroupKeyNames[0] = dbgGroupKeyNames[0].concat(k + " , "));
                    final String[] dbgSortedColumnNames = { new String("") };
                    sortedColumnNames.stream().forEach(k -> dbgSortedColumnNames[0] = dbgSortedColumnNames[0].concat(k + " , "));
                    if ((null != bucketedColumns) && (bucketedColumns.size() > 0)) {
                        final String[] dbgbucketedColumns = { new String("") };
                        bucketedColumns.stream().forEach(k -> dbgbucketedColumns[0] = dbgbucketedColumns[0].concat(k + " , "));
                        log.debug("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames + " bucketedColumns :" + dbgbucketedColumns);
                    }
                    log.debug("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames);
                }
                return partialAndFinalAggregationType;
            }
        }
        partialAndFinalAggregationType.setSortAggregation(true);
        return partialAndFinalAggregationType;
    }
    return partialAndFinalAggregationType;
}
Also used : PartialAndFinalAggregationType(io.prestosql.spi.PartialAndFinalAggregationType) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) ArrayList(java.util.ArrayList) ImmutableList.toImmutableList(com.google.common.collect.ImmutableList.toImmutableList) ArrayList(java.util.ArrayList) Collections.emptyList(java.util.Collections.emptyList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Collectors.toList(java.util.stream.Collectors.toList) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) Constraint(io.prestosql.spi.connector.Constraint)

Aggregations

ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableList.toImmutableList (com.google.common.collect.ImmutableList.toImmutableList)1 SortingColumn (io.prestosql.plugin.hive.metastore.SortingColumn)1 PartialAndFinalAggregationType (io.prestosql.spi.PartialAndFinalAggregationType)1 ConnectorTableMetadata (io.prestosql.spi.connector.ConnectorTableMetadata)1 Constraint (io.prestosql.spi.connector.Constraint)1 ArrayList (java.util.ArrayList)1 Collections.emptyList (java.util.Collections.emptyList)1 List (java.util.List)1 Collectors.toList (java.util.stream.Collectors.toList)1