Search in sources :

Example 1 with CatalogColumnStatisticsDataBinary

use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.

the class HiveStatsUtil method getColumnStatisticsData.

/**
 * Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
 * currently assume that, in Flink, the max and min of ColumnStats will be same type as the
 * Flink column type. For example, for SHORT and Long columns, the max and min of their
 * ColumnStats should be of type SHORT and LONG.
 */
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
    LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
    if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
        if (colStat instanceof CatalogColumnStatisticsDataString) {
            CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
            StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
            hiveStringColumnStats.clear();
            if (null != stringColStat.getMaxLength()) {
                hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
            }
            if (null != stringColStat.getAvgLength()) {
                hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
            }
            if (null != stringColStat.getNullCount()) {
                hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
            }
            if (null != stringColStat.getNdv()) {
                hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
            }
            return ColumnStatisticsData.stringStats(hiveStringColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
        if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
            CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
            BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
            hiveBoolStats.clear();
            if (null != booleanColStat.getTrueCount()) {
                hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
            }
            if (null != booleanColStat.getFalseCount()) {
                hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
            }
            if (null != booleanColStat.getNullCount()) {
                hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
            }
            return ColumnStatisticsData.booleanStats(hiveBoolStats);
        }
    } else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
        if (colStat instanceof CatalogColumnStatisticsDataLong) {
            CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
            LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
            hiveLongColStats.clear();
            if (null != longColStat.getMax()) {
                hiveLongColStats.setHighValue(longColStat.getMax());
            }
            if (null != longColStat.getMin()) {
                hiveLongColStats.setLowValue(longColStat.getMin());
            }
            if (null != longColStat.getNdv()) {
                hiveLongColStats.setNumDVs(longColStat.getNdv());
            }
            if (null != longColStat.getNullCount()) {
                hiveLongColStats.setNumNulls(longColStat.getNullCount());
            }
            return ColumnStatisticsData.longStats(hiveLongColStats);
        }
    } else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
            DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
            hiveFloatStats.clear();
            if (null != doubleColumnStatsData.getMax()) {
                hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
            }
            if (null != doubleColumnStatsData.getMin()) {
                hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
            }
            if (null != doubleColumnStatsData.getNullCount()) {
                hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
            }
            if (null != doubleColumnStatsData.getNdv()) {
                hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
            }
            return ColumnStatisticsData.doubleStats(hiveFloatStats);
        }
    } else if (type.equals(LogicalTypeRoot.DATE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDate) {
            HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
            return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
        }
    } else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
        if (colStat instanceof CatalogColumnStatisticsDataBinary) {
            CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
            BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
            hiveBinaryColumnStats.clear();
            if (null != binaryColumnStatsData.getMaxLength()) {
                hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
            }
            if (null != binaryColumnStatsData.getAvgLength()) {
                hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
            }
            if (null != binaryColumnStatsData.getNullCount()) {
                hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
            }
            return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.DECIMAL)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
            DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
            if (flinkStats.getMax() != null) {
                // in older versions we cannot create HiveDecimal from Double, so convert Double
                // to BigDecimal first
                hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
            }
            if (flinkStats.getMin() != null) {
                hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
            }
            if (flinkStats.getNdv() != null) {
                hiveStats.setNumDVs(flinkStats.getNdv());
            }
            if (flinkStats.getNullCount() != null) {
                hiveStats.setNumNulls(flinkStats.getNullCount());
            }
            return ColumnStatisticsData.decimalStats(hiveStats);
        }
    }
    throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 2 with CatalogColumnStatisticsDataBinary

use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.

the class HiveCatalogHiveMetadataTest method testAlterTableColumnStatistics.

@Test
public void testAlterTableColumnStatistics() throws Exception {
    String hiveVersion = ((HiveCatalog) catalog).getHiveVersion();
    boolean supportDateStats = hiveVersion.compareTo(HiveShimLoader.HIVE_VERSION_V1_2_0) >= 0;
    catalog.createDatabase(db1, createDb(), false);
    TableSchema.Builder builder = TableSchema.builder().field("first", DataTypes.STRING()).field("second", DataTypes.INT()).field("third", DataTypes.BOOLEAN()).field("fourth", DataTypes.DOUBLE()).field("fifth", DataTypes.BIGINT()).field("sixth", DataTypes.BYTES()).field("seventh", DataTypes.DECIMAL(10, 3)).field("eighth", DataTypes.DECIMAL(30, 3));
    if (supportDateStats) {
        builder.field("ninth", DataTypes.DATE());
    }
    TableSchema tableSchema = builder.build();
    CatalogTable catalogTable = new CatalogTableImpl(tableSchema, getBatchTableProperties(), TEST_COMMENT);
    catalog.createTable(path1, catalogTable, false);
    Map<String, CatalogColumnStatisticsDataBase> columnStatisticsDataBaseMap = new HashMap<>();
    columnStatisticsDataBaseMap.put("first", new CatalogColumnStatisticsDataString(10L, 5.2, 3L, 100L));
    columnStatisticsDataBaseMap.put("second", new CatalogColumnStatisticsDataLong(0L, 1000L, 3L, 0L));
    columnStatisticsDataBaseMap.put("third", new CatalogColumnStatisticsDataBoolean(15L, 20L, 3L));
    columnStatisticsDataBaseMap.put("fourth", new CatalogColumnStatisticsDataDouble(15.02, 20.01, 3L, 10L));
    columnStatisticsDataBaseMap.put("fifth", new CatalogColumnStatisticsDataLong(0L, 20L, 3L, 2L));
    columnStatisticsDataBaseMap.put("sixth", new CatalogColumnStatisticsDataBinary(150L, 20D, 3L));
    columnStatisticsDataBaseMap.put("seventh", new CatalogColumnStatisticsDataDouble(1.23, 99.456, 100L, 0L));
    columnStatisticsDataBaseMap.put("eighth", new CatalogColumnStatisticsDataDouble(0.123, 123456.789, 5723L, 19L));
    if (supportDateStats) {
        columnStatisticsDataBaseMap.put("ninth", new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 132L, 0L));
    }
    CatalogColumnStatistics catalogColumnStatistics = new CatalogColumnStatistics(columnStatisticsDataBaseMap);
    catalog.alterTableColumnStatistics(path1, catalogColumnStatistics, false);
    checkEquals(catalogColumnStatistics, catalog.getTableColumnStatistics(path1));
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) TableSchema(org.apache.flink.table.api.TableSchema) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Test(org.junit.Test)

Example 3 with CatalogColumnStatisticsDataBinary

use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.

the class HiveStatsUtil method createTableColumnStats.

/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
    if (stats.isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = stats.getBinaryStats();
        return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
    } else if (stats.isSetBooleanStats()) {
        BooleanColumnStatsData booleanStats = stats.getBooleanStats();
        return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
    } else if (hiveShim.isDateStats(stats)) {
        return hiveShim.toFlinkDateColStats(stats);
    } else if (stats.isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = stats.getDoubleStats();
        return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
    } else if (stats.isSetLongStats()) {
        LongColumnStatsData longColStats = stats.getLongStats();
        return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
    } else if (stats.isSetStringStats()) {
        StringColumnStatsData stringStats = stats.getStringStats();
        return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
    } else if (stats.isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = stats.getDecimalStats();
        // for now, just return CatalogColumnStatisticsDataDouble for decimal columns
        Double max = null;
        if (decimalStats.isSetHighValue()) {
            max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
        }
        Double min = null;
        if (decimalStats.isSetLowValue()) {
            min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
        }
        Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
        Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
        return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
    } else {
        LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
        return null;
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 4 with CatalogColumnStatisticsDataBinary

use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.

the class GenericInMemoryCatalogTest method createColumnStats.

private CatalogColumnStatistics createColumnStats() {
    CatalogColumnStatisticsDataBoolean booleanColStats = new CatalogColumnStatisticsDataBoolean(55L, 45L, 5L);
    CatalogColumnStatisticsDataLong longColStats = new CatalogColumnStatisticsDataLong(-123L, 763322L, 23L, 79L);
    CatalogColumnStatisticsDataString stringColStats = new CatalogColumnStatisticsDataString(152L, 43.5D, 20L, 0L);
    CatalogColumnStatisticsDataDate dateColStats = new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 1321L, 0L);
    CatalogColumnStatisticsDataDouble doubleColStats = new CatalogColumnStatisticsDataDouble(-123.35D, 7633.22D, 23L, 79L);
    CatalogColumnStatisticsDataBinary binaryColStats = new CatalogColumnStatisticsDataBinary(755L, 43.5D, 20L);
    Map<String, CatalogColumnStatisticsDataBase> colStatsMap = new HashMap<>(6);
    colStatsMap.put("b1", booleanColStats);
    colStatsMap.put("l2", longColStats);
    colStatsMap.put("s3", stringColStats);
    colStatsMap.put("d4", dateColStats);
    colStatsMap.put("dd5", doubleColStats);
    colStatsMap.put("bb6", binaryColStats);
    return new CatalogColumnStatistics(colStatsMap);
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics)

Example 5 with CatalogColumnStatisticsDataBinary

use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.

the class CatalogTableStatisticsConverter method convertToColumnStats.

private static ColumnStats convertToColumnStats(CatalogColumnStatisticsDataBase columnStatisticsData) {
    Long ndv = null;
    Long nullCount = columnStatisticsData.getNullCount();
    Double avgLen = null;
    Integer maxLen = null;
    Comparable<?> max = null;
    Comparable<?> min = null;
    if (columnStatisticsData instanceof CatalogColumnStatisticsDataBoolean) {
        CatalogColumnStatisticsDataBoolean booleanData = (CatalogColumnStatisticsDataBoolean) columnStatisticsData;
        avgLen = 1.0;
        maxLen = 1;
        if (null == booleanData.getFalseCount() || null == booleanData.getTrueCount()) {
            ndv = 2L;
        } else if ((booleanData.getFalseCount() == 0 && booleanData.getTrueCount() > 0) || (booleanData.getFalseCount() > 0 && booleanData.getTrueCount() == 0)) {
            ndv = 1L;
        } else {
            ndv = 2L;
        }
    } else if (columnStatisticsData instanceof CatalogColumnStatisticsDataLong) {
        CatalogColumnStatisticsDataLong longData = (CatalogColumnStatisticsDataLong) columnStatisticsData;
        ndv = longData.getNdv();
        avgLen = 8.0;
        maxLen = 8;
        max = longData.getMax();
        min = longData.getMin();
    } else if (columnStatisticsData instanceof CatalogColumnStatisticsDataDouble) {
        CatalogColumnStatisticsDataDouble doubleData = (CatalogColumnStatisticsDataDouble) columnStatisticsData;
        ndv = doubleData.getNdv();
        avgLen = 8.0;
        maxLen = 8;
        max = doubleData.getMax();
        min = doubleData.getMin();
    } else if (columnStatisticsData instanceof CatalogColumnStatisticsDataString) {
        CatalogColumnStatisticsDataString strData = (CatalogColumnStatisticsDataString) columnStatisticsData;
        ndv = strData.getNdv();
        avgLen = strData.getAvgLength();
        maxLen = null == strData.getMaxLength() ? null : strData.getMaxLength().intValue();
    } else if (columnStatisticsData instanceof CatalogColumnStatisticsDataBinary) {
        CatalogColumnStatisticsDataBinary binaryData = (CatalogColumnStatisticsDataBinary) columnStatisticsData;
        avgLen = binaryData.getAvgLength();
        maxLen = null == binaryData.getMaxLength() ? null : binaryData.getMaxLength().intValue();
    } else if (columnStatisticsData instanceof CatalogColumnStatisticsDataDate) {
        CatalogColumnStatisticsDataDate dateData = (CatalogColumnStatisticsDataDate) columnStatisticsData;
        ndv = dateData.getNdv();
        if (dateData.getMax() != null) {
            max = Date.valueOf(DateTimeUtils.unixDateToString((int) dateData.getMax().getDaysSinceEpoch()));
        }
        if (dateData.getMin() != null) {
            min = Date.valueOf(DateTimeUtils.unixDateToString((int) dateData.getMin().getDaysSinceEpoch()));
        }
    } else {
        throw new TableException("Unsupported CatalogColumnStatisticsDataBase: " + columnStatisticsData.getClass().getCanonicalName());
    }
    return ColumnStats.Builder.builder().setNdv(ndv).setNullCount(nullCount).setAvgLen(avgLen).setMaxLen(maxLen).setMax(max).setMin(min).build();
}
Also used : TableException(org.apache.flink.table.api.TableException) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble)

Aggregations

CatalogColumnStatisticsDataBinary (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary)5 CatalogColumnStatisticsDataBoolean (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean)5 CatalogColumnStatisticsDataDouble (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble)5 CatalogColumnStatisticsDataLong (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong)5 CatalogColumnStatisticsDataString (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString)5 CatalogColumnStatisticsDataDate (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate)4 HashMap (java.util.HashMap)2 HiveShim (org.apache.flink.table.catalog.hive.client.HiveShim)2 CatalogColumnStatistics (org.apache.flink.table.catalog.stats.CatalogColumnStatistics)2 CatalogColumnStatisticsDataBase (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase)2 Date (org.apache.flink.table.catalog.stats.Date)2 BinaryColumnStatsData (org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData)2 BooleanColumnStatsData (org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData)2 DecimalColumnStatsData (org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData)2 DoubleColumnStatsData (org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData)2 LongColumnStatsData (org.apache.hadoop.hive.metastore.api.LongColumnStatsData)2 StringColumnStatsData (org.apache.hadoop.hive.metastore.api.StringColumnStatsData)2 TableException (org.apache.flink.table.api.TableException)1 TableSchema (org.apache.flink.table.api.TableSchema)1 CatalogTable (org.apache.flink.table.catalog.CatalogTable)1