use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.
the class HiveStatsUtil method getColumnStatisticsData.
/**
* Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
* currently assume that, in Flink, the max and min of ColumnStats will be same type as the
* Flink column type. For example, for SHORT and Long columns, the max and min of their
* ColumnStats should be of type SHORT and LONG.
*/
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
if (colStat instanceof CatalogColumnStatisticsDataString) {
CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
hiveStringColumnStats.clear();
if (null != stringColStat.getMaxLength()) {
hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
}
if (null != stringColStat.getAvgLength()) {
hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
}
if (null != stringColStat.getNullCount()) {
hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
}
if (null != stringColStat.getNdv()) {
hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
}
return ColumnStatisticsData.stringStats(hiveStringColumnStats);
}
} else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
hiveBoolStats.clear();
if (null != booleanColStat.getTrueCount()) {
hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
}
if (null != booleanColStat.getFalseCount()) {
hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
}
if (null != booleanColStat.getNullCount()) {
hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
}
return ColumnStatisticsData.booleanStats(hiveBoolStats);
}
} else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
if (colStat instanceof CatalogColumnStatisticsDataLong) {
CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
hiveLongColStats.clear();
if (null != longColStat.getMax()) {
hiveLongColStats.setHighValue(longColStat.getMax());
}
if (null != longColStat.getMin()) {
hiveLongColStats.setLowValue(longColStat.getMin());
}
if (null != longColStat.getNdv()) {
hiveLongColStats.setNumDVs(longColStat.getNdv());
}
if (null != longColStat.getNullCount()) {
hiveLongColStats.setNumNulls(longColStat.getNullCount());
}
return ColumnStatisticsData.longStats(hiveLongColStats);
}
} else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
hiveFloatStats.clear();
if (null != doubleColumnStatsData.getMax()) {
hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
}
if (null != doubleColumnStatsData.getMin()) {
hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
}
if (null != doubleColumnStatsData.getNullCount()) {
hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
}
if (null != doubleColumnStatsData.getNdv()) {
hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
}
return ColumnStatisticsData.doubleStats(hiveFloatStats);
}
} else if (type.equals(LogicalTypeRoot.DATE)) {
if (colStat instanceof CatalogColumnStatisticsDataDate) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
}
} else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
if (colStat instanceof CatalogColumnStatisticsDataBinary) {
CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
hiveBinaryColumnStats.clear();
if (null != binaryColumnStatsData.getMaxLength()) {
hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
}
if (null != binaryColumnStatsData.getAvgLength()) {
hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
}
if (null != binaryColumnStatsData.getNullCount()) {
hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
}
return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
}
} else if (type.equals(LogicalTypeRoot.DECIMAL)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
if (flinkStats.getMax() != null) {
// in older versions we cannot create HiveDecimal from Double, so convert Double
// to BigDecimal first
hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
}
if (flinkStats.getMin() != null) {
hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
}
if (flinkStats.getNdv() != null) {
hiveStats.setNumDVs(flinkStats.getNdv());
}
if (flinkStats.getNullCount() != null) {
hiveStats.setNumNulls(flinkStats.getNullCount());
}
return ColumnStatisticsData.decimalStats(hiveStats);
}
}
throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.
the class HiveCatalogHiveMetadataTest method testAlterTableColumnStatistics.
@Test
public void testAlterTableColumnStatistics() throws Exception {
String hiveVersion = ((HiveCatalog) catalog).getHiveVersion();
boolean supportDateStats = hiveVersion.compareTo(HiveShimLoader.HIVE_VERSION_V1_2_0) >= 0;
catalog.createDatabase(db1, createDb(), false);
TableSchema.Builder builder = TableSchema.builder().field("first", DataTypes.STRING()).field("second", DataTypes.INT()).field("third", DataTypes.BOOLEAN()).field("fourth", DataTypes.DOUBLE()).field("fifth", DataTypes.BIGINT()).field("sixth", DataTypes.BYTES()).field("seventh", DataTypes.DECIMAL(10, 3)).field("eighth", DataTypes.DECIMAL(30, 3));
if (supportDateStats) {
builder.field("ninth", DataTypes.DATE());
}
TableSchema tableSchema = builder.build();
CatalogTable catalogTable = new CatalogTableImpl(tableSchema, getBatchTableProperties(), TEST_COMMENT);
catalog.createTable(path1, catalogTable, false);
Map<String, CatalogColumnStatisticsDataBase> columnStatisticsDataBaseMap = new HashMap<>();
columnStatisticsDataBaseMap.put("first", new CatalogColumnStatisticsDataString(10L, 5.2, 3L, 100L));
columnStatisticsDataBaseMap.put("second", new CatalogColumnStatisticsDataLong(0L, 1000L, 3L, 0L));
columnStatisticsDataBaseMap.put("third", new CatalogColumnStatisticsDataBoolean(15L, 20L, 3L));
columnStatisticsDataBaseMap.put("fourth", new CatalogColumnStatisticsDataDouble(15.02, 20.01, 3L, 10L));
columnStatisticsDataBaseMap.put("fifth", new CatalogColumnStatisticsDataLong(0L, 20L, 3L, 2L));
columnStatisticsDataBaseMap.put("sixth", new CatalogColumnStatisticsDataBinary(150L, 20D, 3L));
columnStatisticsDataBaseMap.put("seventh", new CatalogColumnStatisticsDataDouble(1.23, 99.456, 100L, 0L));
columnStatisticsDataBaseMap.put("eighth", new CatalogColumnStatisticsDataDouble(0.123, 123456.789, 5723L, 19L));
if (supportDateStats) {
columnStatisticsDataBaseMap.put("ninth", new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 132L, 0L));
}
CatalogColumnStatistics catalogColumnStatistics = new CatalogColumnStatistics(columnStatisticsDataBaseMap);
catalog.alterTableColumnStatistics(path1, catalogColumnStatistics, false);
checkEquals(catalogColumnStatistics, catalog.getTableColumnStatistics(path1));
}
use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.
the class HiveStatsUtil method createTableColumnStats.
/**
* Create Flink ColumnStats from Hive ColumnStatisticsData.
*/
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
if (stats.isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = stats.getBinaryStats();
return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
} else if (stats.isSetBooleanStats()) {
BooleanColumnStatsData booleanStats = stats.getBooleanStats();
return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
} else if (hiveShim.isDateStats(stats)) {
return hiveShim.toFlinkDateColStats(stats);
} else if (stats.isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = stats.getDoubleStats();
return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
} else if (stats.isSetLongStats()) {
LongColumnStatsData longColStats = stats.getLongStats();
return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
} else if (stats.isSetStringStats()) {
StringColumnStatsData stringStats = stats.getStringStats();
return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
} else if (stats.isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = stats.getDecimalStats();
// for now, just return CatalogColumnStatisticsDataDouble for decimal columns
Double max = null;
if (decimalStats.isSetHighValue()) {
max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
}
Double min = null;
if (decimalStats.isSetLowValue()) {
min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
}
Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
} else {
LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
return null;
}
}
use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.
the class GenericInMemoryCatalogTest method createColumnStats.
private CatalogColumnStatistics createColumnStats() {
CatalogColumnStatisticsDataBoolean booleanColStats = new CatalogColumnStatisticsDataBoolean(55L, 45L, 5L);
CatalogColumnStatisticsDataLong longColStats = new CatalogColumnStatisticsDataLong(-123L, 763322L, 23L, 79L);
CatalogColumnStatisticsDataString stringColStats = new CatalogColumnStatisticsDataString(152L, 43.5D, 20L, 0L);
CatalogColumnStatisticsDataDate dateColStats = new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 1321L, 0L);
CatalogColumnStatisticsDataDouble doubleColStats = new CatalogColumnStatisticsDataDouble(-123.35D, 7633.22D, 23L, 79L);
CatalogColumnStatisticsDataBinary binaryColStats = new CatalogColumnStatisticsDataBinary(755L, 43.5D, 20L);
Map<String, CatalogColumnStatisticsDataBase> colStatsMap = new HashMap<>(6);
colStatsMap.put("b1", booleanColStats);
colStatsMap.put("l2", longColStats);
colStatsMap.put("s3", stringColStats);
colStatsMap.put("d4", dateColStats);
colStatsMap.put("dd5", doubleColStats);
colStatsMap.put("bb6", binaryColStats);
return new CatalogColumnStatistics(colStatsMap);
}
use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary in project flink by apache.
the class CatalogTableStatisticsConverter method convertToColumnStats.
private static ColumnStats convertToColumnStats(CatalogColumnStatisticsDataBase columnStatisticsData) {
Long ndv = null;
Long nullCount = columnStatisticsData.getNullCount();
Double avgLen = null;
Integer maxLen = null;
Comparable<?> max = null;
Comparable<?> min = null;
if (columnStatisticsData instanceof CatalogColumnStatisticsDataBoolean) {
CatalogColumnStatisticsDataBoolean booleanData = (CatalogColumnStatisticsDataBoolean) columnStatisticsData;
avgLen = 1.0;
maxLen = 1;
if (null == booleanData.getFalseCount() || null == booleanData.getTrueCount()) {
ndv = 2L;
} else if ((booleanData.getFalseCount() == 0 && booleanData.getTrueCount() > 0) || (booleanData.getFalseCount() > 0 && booleanData.getTrueCount() == 0)) {
ndv = 1L;
} else {
ndv = 2L;
}
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataLong) {
CatalogColumnStatisticsDataLong longData = (CatalogColumnStatisticsDataLong) columnStatisticsData;
ndv = longData.getNdv();
avgLen = 8.0;
maxLen = 8;
max = longData.getMax();
min = longData.getMin();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble doubleData = (CatalogColumnStatisticsDataDouble) columnStatisticsData;
ndv = doubleData.getNdv();
avgLen = 8.0;
maxLen = 8;
max = doubleData.getMax();
min = doubleData.getMin();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataString) {
CatalogColumnStatisticsDataString strData = (CatalogColumnStatisticsDataString) columnStatisticsData;
ndv = strData.getNdv();
avgLen = strData.getAvgLength();
maxLen = null == strData.getMaxLength() ? null : strData.getMaxLength().intValue();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataBinary) {
CatalogColumnStatisticsDataBinary binaryData = (CatalogColumnStatisticsDataBinary) columnStatisticsData;
avgLen = binaryData.getAvgLength();
maxLen = null == binaryData.getMaxLength() ? null : binaryData.getMaxLength().intValue();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataDate) {
CatalogColumnStatisticsDataDate dateData = (CatalogColumnStatisticsDataDate) columnStatisticsData;
ndv = dateData.getNdv();
if (dateData.getMax() != null) {
max = Date.valueOf(DateTimeUtils.unixDateToString((int) dateData.getMax().getDaysSinceEpoch()));
}
if (dateData.getMin() != null) {
min = Date.valueOf(DateTimeUtils.unixDateToString((int) dateData.getMin().getDaysSinceEpoch()));
}
} else {
throw new TableException("Unsupported CatalogColumnStatisticsDataBase: " + columnStatisticsData.getClass().getCanonicalName());
}
return ColumnStats.Builder.builder().setNdv(ndv).setNullCount(nullCount).setAvgLen(avgLen).setMaxLen(maxLen).setMax(max).setMin(min).build();
}
Aggregations