use of io.prestosql.spi.PartialAndFinalAggregationType in project hetu-core by openlookeng.
the class HiveMetadata method validateAndGetSortAggregationType.
@Override
public PartialAndFinalAggregationType validateAndGetSortAggregationType(ConnectorSession session, ConnectorTableHandle tableHandle, List<String> groupKeyNames) {
PartialAndFinalAggregationType partialAndFinalAggregationType = new PartialAndFinalAggregationType();
ConnectorTableMetadata connectorTableMetadata = getTableMetadata(session, ((HiveTableHandle) tableHandle).getSchemaTableName());
List<SortingColumn> sortingColumn = (List<SortingColumn>) connectorTableMetadata.getProperties().get(HiveTableProperties.SORTED_BY_PROPERTY);
boolean isSortingColumnsNotPresent = (sortingColumn == null) || (sortingColumn.size() == 0);
List<String> partitionedBy = new ArrayList<>();
List<String> partitionedByTemp = (List<String>) connectorTableMetadata.getProperties().get(HiveTableProperties.PARTITIONED_BY_PROPERTY);
if ((partitionedByTemp != null) && (partitionedByTemp.size() != 0)) {
partitionedBy.addAll(partitionedByTemp);
if (isSortingColumnsNotPresent && (partitionedByTemp.size() != groupKeyNames.size())) {
return partialAndFinalAggregationType;
}
} else if (isSortingColumnsNotPresent) {
return partialAndFinalAggregationType;
}
int bucketCount = 0;
List<String> bucketedColumns = new ArrayList<>();
if (!isSortingColumnsNotPresent) {
bucketedColumns.addAll((List<String>) connectorTableMetadata.getProperties().get(HiveTableProperties.BUCKETED_BY_PROPERTY));
if (null != bucketedColumns) {
bucketCount = (int) connectorTableMetadata.getProperties().get(HiveTableProperties.BUCKET_COUNT_PROPERTY);
}
}
List<String> sortedColumnNames = new ArrayList<>();
if ((sortingColumn != null) && (sortingColumn.size() != 0)) {
sortedColumnNames.addAll(sortingColumn.stream().map(column -> column.getColumnName()).collect(Collectors.toList()));
}
// grouping key should be sub set of sorted By and it should match all partition by columns
if ((partitionedBy.size() + sortedColumnNames.size() < groupKeyNames.size()) || (partitionedBy.size() > groupKeyNames.size())) {
// sorted columns are less than join criteria columns
log.debug("number of sorted columns " + sortedColumnNames.size() + "are less join column size " + groupKeyNames.size());
return partialAndFinalAggregationType;
}
// bucketby columns and groupby Columns should be same.
// or when bucket count should be 1 and bucket column that matches with groupBy
// or when bucket count is 0 no need to compare buckets
int partitionedByCount = partitionedBy.size() == 0 ? 0 : partitionedBy.size() - 1;
boolean singleOrZeroBucketedColumn = (((bucketCount == 1) && (bucketedColumns.size() == 1) && (groupKeyNames.get(partitionedByCount).equals(bucketedColumns.get(0)))) || (bucketCount == 0));
if ((bucketCount == 1) && (bucketedColumns.size() > 1)) {
int minSize = Math.min(groupKeyNames.size() - partitionedBy.size(), bucketedColumns.size());
int partSize = partitionedBy.size();
for (int keyIdx = 0; keyIdx < minSize; keyIdx++) {
if (!groupKeyNames.get(keyIdx + partSize).equals(bucketedColumns.get(keyIdx))) {
return partialAndFinalAggregationType;
}
}
singleOrZeroBucketedColumn = true;
}
for (int numOfComparedKeys = 0; numOfComparedKeys < partitionedBy.size(); numOfComparedKeys++) {
if ((!groupKeyNames.get(numOfComparedKeys).equals(partitionedBy.get(numOfComparedKeys)))) {
return partialAndFinalAggregationType;
}
}
if (groupKeyNames.size() == partitionedBy.size()) {
partialAndFinalAggregationType.setPartialAsSortAndFinalAsHashAggregation(true);
return partialAndFinalAggregationType;
}
if (singleOrZeroBucketedColumn || (groupKeyNames.size() == (bucketedColumns.size() + partitionedBy.size()))) {
int numOfCmpKeysAfterPartitionedBy = partitionedBy.size();
for (int numOfComparedKeys = 0; numOfComparedKeys < groupKeyNames.size() - partitionedBy.size(); numOfComparedKeys++, numOfCmpKeysAfterPartitionedBy++) {
boolean bucketedColumnsResult = !singleOrZeroBucketedColumn && (!groupKeyNames.get(numOfComparedKeys).equals(bucketedColumns.get(numOfComparedKeys)));
if ((!groupKeyNames.get(numOfCmpKeysAfterPartitionedBy).equals(sortedColumnNames.get(numOfComparedKeys))) || (!singleOrZeroBucketedColumn && bucketedColumnsResult)) {
if (log.isDebugEnabled()) {
final String[] dbgGroupKeyNames = { new String("") };
groupKeyNames.stream().forEach(k -> dbgGroupKeyNames[0] = dbgGroupKeyNames[0].concat(k + " , "));
final String[] dbgSortedColumnNames = { new String("") };
sortedColumnNames.stream().forEach(k -> dbgSortedColumnNames[0] = dbgSortedColumnNames[0].concat(k + " , "));
if ((null != bucketedColumns) && (bucketedColumns.size() > 0)) {
final String[] dbgbucketedColumns = { new String("") };
bucketedColumns.stream().forEach(k -> dbgbucketedColumns[0] = dbgbucketedColumns[0].concat(k + " , "));
log.debug("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames + " bucketedColumns :" + dbgbucketedColumns);
}
log.debug("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames);
}
return partialAndFinalAggregationType;
}
}
partialAndFinalAggregationType.setSortAggregation(true);
return partialAndFinalAggregationType;
}
return partialAndFinalAggregationType;
}
Aggregations