Search in sources :

Example 1 with LogicalTypeRoot

use of org.apache.flink.table.types.logical.LogicalTypeRoot in project flink by apache.

the class HiveStatsUtil method getColumnStatisticsData.

/**
 * Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
 * currently assume that, in Flink, the max and min of ColumnStats will be same type as the
 * Flink column type. For example, for SHORT and Long columns, the max and min of their
 * ColumnStats should be of type SHORT and LONG.
 */
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
    LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
    if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
        if (colStat instanceof CatalogColumnStatisticsDataString) {
            CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
            StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
            hiveStringColumnStats.clear();
            if (null != stringColStat.getMaxLength()) {
                hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
            }
            if (null != stringColStat.getAvgLength()) {
                hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
            }
            if (null != stringColStat.getNullCount()) {
                hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
            }
            if (null != stringColStat.getNdv()) {
                hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
            }
            return ColumnStatisticsData.stringStats(hiveStringColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
        if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
            CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
            BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
            hiveBoolStats.clear();
            if (null != booleanColStat.getTrueCount()) {
                hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
            }
            if (null != booleanColStat.getFalseCount()) {
                hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
            }
            if (null != booleanColStat.getNullCount()) {
                hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
            }
            return ColumnStatisticsData.booleanStats(hiveBoolStats);
        }
    } else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
        if (colStat instanceof CatalogColumnStatisticsDataLong) {
            CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
            LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
            hiveLongColStats.clear();
            if (null != longColStat.getMax()) {
                hiveLongColStats.setHighValue(longColStat.getMax());
            }
            if (null != longColStat.getMin()) {
                hiveLongColStats.setLowValue(longColStat.getMin());
            }
            if (null != longColStat.getNdv()) {
                hiveLongColStats.setNumDVs(longColStat.getNdv());
            }
            if (null != longColStat.getNullCount()) {
                hiveLongColStats.setNumNulls(longColStat.getNullCount());
            }
            return ColumnStatisticsData.longStats(hiveLongColStats);
        }
    } else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
            DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
            hiveFloatStats.clear();
            if (null != doubleColumnStatsData.getMax()) {
                hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
            }
            if (null != doubleColumnStatsData.getMin()) {
                hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
            }
            if (null != doubleColumnStatsData.getNullCount()) {
                hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
            }
            if (null != doubleColumnStatsData.getNdv()) {
                hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
            }
            return ColumnStatisticsData.doubleStats(hiveFloatStats);
        }
    } else if (type.equals(LogicalTypeRoot.DATE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDate) {
            HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
            return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
        }
    } else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
        if (colStat instanceof CatalogColumnStatisticsDataBinary) {
            CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
            BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
            hiveBinaryColumnStats.clear();
            if (null != binaryColumnStatsData.getMaxLength()) {
                hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
            }
            if (null != binaryColumnStatsData.getAvgLength()) {
                hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
            }
            if (null != binaryColumnStatsData.getNullCount()) {
                hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
            }
            return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.DECIMAL)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
            DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
            if (flinkStats.getMax() != null) {
                // in older versions we cannot create HiveDecimal from Double, so convert Double
                // to BigDecimal first
                hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
            }
            if (flinkStats.getMin() != null) {
                hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
            }
            if (flinkStats.getNdv() != null) {
                hiveStats.setNumDVs(flinkStats.getNdv());
            }
            if (flinkStats.getNullCount() != null) {
                hiveStats.setNumNulls(flinkStats.getNullCount());
            }
            return ColumnStatisticsData.decimalStats(hiveStats);
        }
    }
    throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 2 with LogicalTypeRoot

use of org.apache.flink.table.types.logical.LogicalTypeRoot in project flink by apache.

the class LogicalTypeCasts method supportsCasting.

// --------------------------------------------------------------------------------------------
private static boolean supportsCasting(LogicalType sourceType, LogicalType targetType, boolean allowExplicit) {
    // but it might be useful to cast explicitly with knowledge about the data
    if (sourceType.isNullable() && !targetType.isNullable() && !allowExplicit) {
        return false;
    }
    // ignore nullability during compare
    if (sourceType.copy(true).equals(targetType.copy(true))) {
        return true;
    }
    final LogicalTypeRoot sourceRoot = sourceType.getTypeRoot();
    final LogicalTypeRoot targetRoot = targetType.getTypeRoot();
    if (sourceRoot == NULL) {
        // null can be cast to an arbitrary type
        return true;
    } else if (sourceRoot == DISTINCT_TYPE && targetRoot == DISTINCT_TYPE) {
        // possible
        return false;
    } else if (sourceRoot == DISTINCT_TYPE) {
        return supportsCasting(((DistinctType) sourceType).getSourceType(), targetType, allowExplicit);
    } else if (targetRoot == DISTINCT_TYPE) {
        return supportsCasting(sourceType, ((DistinctType) targetType).getSourceType(), allowExplicit);
    } else if (sourceType.is(INTERVAL) && targetType.is(EXACT_NUMERIC)) {
        // field
        return isSingleFieldInterval(sourceType);
    } else if (sourceType.is(EXACT_NUMERIC) && targetType.is(INTERVAL)) {
        // field
        return isSingleFieldInterval(targetType);
    } else if ((sourceType.is(CONSTRUCTED) || sourceType.is(STRUCTURED_TYPE)) && (targetType.is(CONSTRUCTED) || targetType.is(STRUCTURED_TYPE))) {
        if (sourceType.is(CONSTRUCTED) || targetType.is(CONSTRUCTED)) {
            return supportsConstructedCasting(sourceType, targetType, allowExplicit);
        }
        return supportsStructuredCasting(sourceType, targetType, (s, t) -> supportsCasting(s, t, allowExplicit));
    } else if (sourceRoot == RAW && !targetType.is(BINARY_STRING) && !targetType.is(CHARACTER_STRING) || targetRoot == RAW) {
        // the two raw types are not equal (from initial invariant), casting is not possible
        return false;
    } else if (sourceRoot == SYMBOL || targetRoot == SYMBOL) {
        // the two symbol types are not equal (from initial invariant), casting is not possible
        return false;
    }
    if (implicitCastingRules.get(targetRoot).contains(sourceRoot)) {
        return true;
    }
    if (allowExplicit) {
        return explicitCastingRules.get(targetRoot).contains(sourceRoot);
    }
    return false;
}
Also used : DistinctType(org.apache.flink.table.types.logical.DistinctType) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot)

Example 3 with LogicalTypeRoot

use of org.apache.flink.table.types.logical.LogicalTypeRoot in project flink by apache.

the class LogicalTypeCasts method supportsStructuredCasting.

private static boolean supportsStructuredCasting(LogicalType sourceType, LogicalType targetType, BiFunction<LogicalType, LogicalType, Boolean> childPredicate) {
    final LogicalTypeRoot sourceRoot = sourceType.getTypeRoot();
    final LogicalTypeRoot targetRoot = targetType.getTypeRoot();
    if (sourceRoot != STRUCTURED_TYPE || targetRoot != STRUCTURED_TYPE) {
        return false;
    }
    final StructuredType sourceStructuredType = (StructuredType) sourceType;
    final StructuredType targetStructuredType = (StructuredType) targetType;
    // non-anonymous structured types must be fully equal
    if (sourceStructuredType.getObjectIdentifier().isPresent() || targetStructuredType.getObjectIdentifier().isPresent()) {
        return false;
    }
    // for anonymous structured types we are a bit more lenient, if they provide similar fields
    // e.g. this is necessary when structured types derived from type information and
    // structured types derived within Table API are slightly different
    final Class<?> sourceClass = sourceStructuredType.getImplementationClass().orElse(null);
    final Class<?> targetClass = targetStructuredType.getImplementationClass().orElse(null);
    if (sourceClass != targetClass) {
        return false;
    }
    final List<String> sourceNames = sourceStructuredType.getAttributes().stream().map(StructuredType.StructuredAttribute::getName).collect(Collectors.toList());
    final List<String> targetNames = sourceStructuredType.getAttributes().stream().map(StructuredType.StructuredAttribute::getName).collect(Collectors.toList());
    if (!sourceNames.equals(targetNames)) {
        return false;
    }
    final List<LogicalType> sourceChildren = sourceType.getChildren();
    final List<LogicalType> targetChildren = targetType.getChildren();
    for (int i = 0; i < sourceChildren.size(); i++) {
        if (!childPredicate.apply(sourceChildren.get(i), targetChildren.get(i))) {
            return false;
        }
    }
    return true;
}
Also used : LogicalType(org.apache.flink.table.types.logical.LogicalType) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) StructuredType(org.apache.flink.table.types.logical.StructuredType)

Example 4 with LogicalTypeRoot

use of org.apache.flink.table.types.logical.LogicalTypeRoot in project flink by apache.

the class LogicalTypeCasts method supportsConstructedCasting.

private static boolean supportsConstructedCasting(LogicalType sourceType, LogicalType targetType, boolean allowExplicit) {
    final LogicalTypeRoot sourceRoot = sourceType.getTypeRoot();
    final LogicalTypeRoot targetRoot = targetType.getTypeRoot();
    // however, rows can be converted to structured types and vice versa
    if (sourceRoot == targetRoot || (sourceRoot == ROW && targetRoot == STRUCTURED_TYPE) || (sourceRoot == STRUCTURED_TYPE && targetRoot == ROW)) {
        final List<LogicalType> sourceChildren = sourceType.getChildren();
        final List<LogicalType> targetChildren = targetType.getChildren();
        if (sourceChildren.size() != targetChildren.size()) {
            return false;
        }
        for (int i = 0; i < sourceChildren.size(); i++) {
            if (!supportsCasting(sourceChildren.get(i), targetChildren.get(i), allowExplicit)) {
                return false;
            }
        }
        return true;
    }
    return false;
}
Also used : LogicalType(org.apache.flink.table.types.logical.LogicalType) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot)

Example 5 with LogicalTypeRoot

use of org.apache.flink.table.types.logical.LogicalTypeRoot in project flink by apache.

the class LogicalTypeMerging method createCommonExactNumericType.

private static LogicalType createCommonExactNumericType(LogicalType resultType, LogicalType type) {
    // same EXACT_NUMERIC types
    if (type.equals(resultType)) {
        return resultType;
    }
    final LogicalTypeRoot resultTypeRoot = resultType.getTypeRoot();
    final LogicalTypeRoot typeRoot = type.getTypeRoot();
    // no DECIMAL types involved
    if (resultTypeRoot != DECIMAL && typeRoot != DECIMAL) {
        // type root contains order of precision
        if (getPrecision(type) > getPrecision(resultType)) {
            return type;
        }
        return resultType;
    }
    // determine DECIMAL with precision (p), scale (s) and number of whole digits (d):
    // d = max(p1 - s1, p2 - s2)
    // s <= max(s1, s2)
    // p = s + d
    final int p1 = getPrecision(resultType);
    final int p2 = getPrecision(type);
    final int s1 = getScale(resultType);
    final int s2 = getScale(type);
    final int maxPrecision = DecimalType.MAX_PRECISION;
    int d = Math.max(p1 - s1, p2 - s2);
    d = Math.min(d, maxPrecision);
    int s = Math.max(s1, s2);
    s = Math.min(s, maxPrecision - d);
    final int p = d + s;
    return new DecimalType(p, s);
}
Also used : DecimalType(org.apache.flink.table.types.logical.DecimalType) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot)

Aggregations

LogicalTypeRoot (org.apache.flink.table.types.logical.LogicalTypeRoot)12 LogicalType (org.apache.flink.table.types.logical.LogicalType)8 ArrayList (java.util.ArrayList)3 Nullable (javax.annotation.Nullable)3 DateType (org.apache.flink.table.types.logical.DateType)2 DayTimeIntervalType (org.apache.flink.table.types.logical.DayTimeIntervalType)2 DecimalType (org.apache.flink.table.types.logical.DecimalType)2 DoubleType (org.apache.flink.table.types.logical.DoubleType)2 LegacyTypeInformationType (org.apache.flink.table.types.logical.LegacyTypeInformationType)2 AbstractList (java.util.AbstractList)1 Arrays (java.util.Arrays)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Optional (java.util.Optional)1 Collectors (java.util.stream.Collectors)1 IntStream (java.util.stream.IntStream)1 Internal (org.apache.flink.annotation.Internal)1 CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)1