use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong in project flink by apache.
the class HiveStatsUtil method createTableColumnStats.
/**
* Create Flink ColumnStats from Hive ColumnStatisticsData.
*/
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
if (stats.isSetBinaryStats()) {
BinaryColumnStatsData binaryStats = stats.getBinaryStats();
return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
} else if (stats.isSetBooleanStats()) {
BooleanColumnStatsData booleanStats = stats.getBooleanStats();
return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
} else if (hiveShim.isDateStats(stats)) {
return hiveShim.toFlinkDateColStats(stats);
} else if (stats.isSetDoubleStats()) {
DoubleColumnStatsData doubleStats = stats.getDoubleStats();
return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
} else if (stats.isSetLongStats()) {
LongColumnStatsData longColStats = stats.getLongStats();
return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
} else if (stats.isSetStringStats()) {
StringColumnStatsData stringStats = stats.getStringStats();
return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
} else if (stats.isSetDecimalStats()) {
DecimalColumnStatsData decimalStats = stats.getDecimalStats();
// for now, just return CatalogColumnStatisticsDataDouble for decimal columns
Double max = null;
if (decimalStats.isSetHighValue()) {
max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
}
Double min = null;
if (decimalStats.isSetLowValue()) {
min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
}
Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
} else {
LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
return null;
}
}
use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong in project flink by apache.
the class GenericInMemoryCatalogTest method createColumnStats.
private CatalogColumnStatistics createColumnStats() {
CatalogColumnStatisticsDataBoolean booleanColStats = new CatalogColumnStatisticsDataBoolean(55L, 45L, 5L);
CatalogColumnStatisticsDataLong longColStats = new CatalogColumnStatisticsDataLong(-123L, 763322L, 23L, 79L);
CatalogColumnStatisticsDataString stringColStats = new CatalogColumnStatisticsDataString(152L, 43.5D, 20L, 0L);
CatalogColumnStatisticsDataDate dateColStats = new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 1321L, 0L);
CatalogColumnStatisticsDataDouble doubleColStats = new CatalogColumnStatisticsDataDouble(-123.35D, 7633.22D, 23L, 79L);
CatalogColumnStatisticsDataBinary binaryColStats = new CatalogColumnStatisticsDataBinary(755L, 43.5D, 20L);
Map<String, CatalogColumnStatisticsDataBase> colStatsMap = new HashMap<>(6);
colStatsMap.put("b1", booleanColStats);
colStatsMap.put("l2", longColStats);
colStatsMap.put("s3", stringColStats);
colStatsMap.put("d4", dateColStats);
colStatsMap.put("dd5", doubleColStats);
colStatsMap.put("bb6", binaryColStats);
return new CatalogColumnStatistics(colStatsMap);
}
use of org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong in project flink by apache.
the class CatalogTableStatisticsConverter method convertToColumnStats.
private static ColumnStats convertToColumnStats(CatalogColumnStatisticsDataBase columnStatisticsData) {
Long ndv = null;
Long nullCount = columnStatisticsData.getNullCount();
Double avgLen = null;
Integer maxLen = null;
Comparable<?> max = null;
Comparable<?> min = null;
if (columnStatisticsData instanceof CatalogColumnStatisticsDataBoolean) {
CatalogColumnStatisticsDataBoolean booleanData = (CatalogColumnStatisticsDataBoolean) columnStatisticsData;
avgLen = 1.0;
maxLen = 1;
if (null == booleanData.getFalseCount() || null == booleanData.getTrueCount()) {
ndv = 2L;
} else if ((booleanData.getFalseCount() == 0 && booleanData.getTrueCount() > 0) || (booleanData.getFalseCount() > 0 && booleanData.getTrueCount() == 0)) {
ndv = 1L;
} else {
ndv = 2L;
}
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataLong) {
CatalogColumnStatisticsDataLong longData = (CatalogColumnStatisticsDataLong) columnStatisticsData;
ndv = longData.getNdv();
avgLen = 8.0;
maxLen = 8;
max = longData.getMax();
min = longData.getMin();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble doubleData = (CatalogColumnStatisticsDataDouble) columnStatisticsData;
ndv = doubleData.getNdv();
avgLen = 8.0;
maxLen = 8;
max = doubleData.getMax();
min = doubleData.getMin();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataString) {
CatalogColumnStatisticsDataString strData = (CatalogColumnStatisticsDataString) columnStatisticsData;
ndv = strData.getNdv();
avgLen = strData.getAvgLength();
maxLen = null == strData.getMaxLength() ? null : strData.getMaxLength().intValue();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataBinary) {
CatalogColumnStatisticsDataBinary binaryData = (CatalogColumnStatisticsDataBinary) columnStatisticsData;
avgLen = binaryData.getAvgLength();
maxLen = null == binaryData.getMaxLength() ? null : binaryData.getMaxLength().intValue();
} else if (columnStatisticsData instanceof CatalogColumnStatisticsDataDate) {
CatalogColumnStatisticsDataDate dateData = (CatalogColumnStatisticsDataDate) columnStatisticsData;
ndv = dateData.getNdv();
if (dateData.getMax() != null) {
max = Date.valueOf(DateTimeUtils.unixDateToString((int) dateData.getMax().getDaysSinceEpoch()));
}
if (dateData.getMin() != null) {
min = Date.valueOf(DateTimeUtils.unixDateToString((int) dateData.getMin().getDaysSinceEpoch()));
}
} else {
throw new TableException("Unsupported CatalogColumnStatisticsDataBase: " + columnStatisticsData.getClass().getCanonicalName());
}
return ColumnStats.Builder.builder().setNdv(ndv).setNullCount(nullCount).setAvgLen(avgLen).setMaxLen(maxLen).setMax(max).setMin(min).build();
}
Aggregations