Search in sources :

Example 1 with Date

use of org.apache.flink.table.catalog.stats.Date in project flink by apache.

the class HiveCatalogHiveMetadataTest method testAlterTableColumnStatistics.

@Test
public void testAlterTableColumnStatistics() throws Exception {
    String hiveVersion = ((HiveCatalog) catalog).getHiveVersion();
    boolean supportDateStats = hiveVersion.compareTo(HiveShimLoader.HIVE_VERSION_V1_2_0) >= 0;
    catalog.createDatabase(db1, createDb(), false);
    TableSchema.Builder builder = TableSchema.builder().field("first", DataTypes.STRING()).field("second", DataTypes.INT()).field("third", DataTypes.BOOLEAN()).field("fourth", DataTypes.DOUBLE()).field("fifth", DataTypes.BIGINT()).field("sixth", DataTypes.BYTES()).field("seventh", DataTypes.DECIMAL(10, 3)).field("eighth", DataTypes.DECIMAL(30, 3));
    if (supportDateStats) {
        builder.field("ninth", DataTypes.DATE());
    }
    TableSchema tableSchema = builder.build();
    CatalogTable catalogTable = new CatalogTableImpl(tableSchema, getBatchTableProperties(), TEST_COMMENT);
    catalog.createTable(path1, catalogTable, false);
    Map<String, CatalogColumnStatisticsDataBase> columnStatisticsDataBaseMap = new HashMap<>();
    columnStatisticsDataBaseMap.put("first", new CatalogColumnStatisticsDataString(10L, 5.2, 3L, 100L));
    columnStatisticsDataBaseMap.put("second", new CatalogColumnStatisticsDataLong(0L, 1000L, 3L, 0L));
    columnStatisticsDataBaseMap.put("third", new CatalogColumnStatisticsDataBoolean(15L, 20L, 3L));
    columnStatisticsDataBaseMap.put("fourth", new CatalogColumnStatisticsDataDouble(15.02, 20.01, 3L, 10L));
    columnStatisticsDataBaseMap.put("fifth", new CatalogColumnStatisticsDataLong(0L, 20L, 3L, 2L));
    columnStatisticsDataBaseMap.put("sixth", new CatalogColumnStatisticsDataBinary(150L, 20D, 3L));
    columnStatisticsDataBaseMap.put("seventh", new CatalogColumnStatisticsDataDouble(1.23, 99.456, 100L, 0L));
    columnStatisticsDataBaseMap.put("eighth", new CatalogColumnStatisticsDataDouble(0.123, 123456.789, 5723L, 19L));
    if (supportDateStats) {
        columnStatisticsDataBaseMap.put("ninth", new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 132L, 0L));
    }
    CatalogColumnStatistics catalogColumnStatistics = new CatalogColumnStatistics(columnStatisticsDataBaseMap);
    catalog.alterTableColumnStatistics(path1, catalogColumnStatistics, false);
    checkEquals(catalogColumnStatistics, catalog.getTableColumnStatistics(path1));
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) TableSchema(org.apache.flink.table.api.TableSchema) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogTable(org.apache.flink.table.catalog.CatalogTable) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogTableImpl(org.apache.flink.table.catalog.CatalogTableImpl) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Test(org.junit.Test)

Example 2 with Date

use of org.apache.flink.table.catalog.stats.Date in project flink by apache.

the class HiveShimV120 method toFlinkDateColStats.

@Override
public CatalogColumnStatisticsDataDate toFlinkDateColStats(ColumnStatisticsData hiveDateColStats) {
    try {
        Object dateStats = ColumnStatisticsData.class.getDeclaredMethod("getDateStats").invoke(hiveDateColStats);
        Class dateStatsClz = dateStats.getClass();
        boolean isSetNumDv = (boolean) dateStatsClz.getMethod("isSetNumDVs").invoke(dateStats);
        boolean isSetNumNull = (boolean) dateStatsClz.getMethod("isSetNumNulls").invoke(dateStats);
        boolean isSetHighValue = (boolean) dateStatsClz.getMethod("isSetHighValue").invoke(dateStats);
        boolean isSetLowValue = (boolean) dateStatsClz.getMethod("isSetLowValue").invoke(dateStats);
        Long numDV = isSetNumDv ? (Long) dateStatsClz.getMethod("getNumDVs").invoke(dateStats) : null;
        Long numNull = isSetNumNull ? (Long) dateStatsClz.getMethod("getNumNulls").invoke(dateStats) : null;
        Object hmsHighDate = dateStatsClz.getMethod("getHighValue").invoke(dateStats);
        Object hmsLowDate = dateStatsClz.getMethod("getLowValue").invoke(dateStats);
        Class hmsDateClz = hmsHighDate.getClass();
        Method hmsDateDays = hmsDateClz.getMethod("getDaysSinceEpoch");
        Date highDateDays = isSetHighValue ? new Date((Long) hmsDateDays.invoke(hmsHighDate)) : null;
        Date lowDateDays = isSetLowValue ? new Date((Long) hmsDateDays.invoke(hmsLowDate)) : null;
        return new CatalogColumnStatisticsDataDate(lowDateDays, highDateDays, numDV, numNull);
    } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException e) {
        throw new CatalogException("Failed to create Flink statistics for date column", e);
    }
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) Method(java.lang.reflect.Method) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) InvocationTargetException(java.lang.reflect.InvocationTargetException) ColumnStatisticsData(org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)

Example 3 with Date

use of org.apache.flink.table.catalog.stats.Date in project flink by apache.

the class CatalogStatisticsTest method createColumnStats.

private CatalogColumnStatistics createColumnStats() {
    CatalogColumnStatisticsDataBoolean booleanColStats = new CatalogColumnStatisticsDataBoolean(55L, 45L, 5L);
    CatalogColumnStatisticsDataLong longColStats = new CatalogColumnStatisticsDataLong(-123L, 763322L, 23L, 77L);
    CatalogColumnStatisticsDataString stringColStats = new CatalogColumnStatisticsDataString(152L, 43.5D, 20L, 0L);
    CatalogColumnStatisticsDataDate dateColStats = new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 100L, 0L);
    CatalogColumnStatisticsDataDouble doubleColStats = new CatalogColumnStatisticsDataDouble(-123.35D, 7633.22D, 73L, 27L);
    Map<String, CatalogColumnStatisticsDataBase> colStatsMap = new HashMap<>(6);
    colStatsMap.put("b1", booleanColStats);
    colStatsMap.put("l2", longColStats);
    colStatsMap.put("s3", stringColStats);
    colStatsMap.put("d4", dateColStats);
    colStatsMap.put("dd5", doubleColStats);
    return new CatalogColumnStatistics(colStatsMap);
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics)

Example 4 with Date

use of org.apache.flink.table.catalog.stats.Date in project flink by apache.

the class GenericInMemoryCatalogTest method createColumnStats.

private CatalogColumnStatistics createColumnStats() {
    CatalogColumnStatisticsDataBoolean booleanColStats = new CatalogColumnStatisticsDataBoolean(55L, 45L, 5L);
    CatalogColumnStatisticsDataLong longColStats = new CatalogColumnStatisticsDataLong(-123L, 763322L, 23L, 79L);
    CatalogColumnStatisticsDataString stringColStats = new CatalogColumnStatisticsDataString(152L, 43.5D, 20L, 0L);
    CatalogColumnStatisticsDataDate dateColStats = new CatalogColumnStatisticsDataDate(new Date(71L), new Date(17923L), 1321L, 0L);
    CatalogColumnStatisticsDataDouble doubleColStats = new CatalogColumnStatisticsDataDouble(-123.35D, 7633.22D, 23L, 79L);
    CatalogColumnStatisticsDataBinary binaryColStats = new CatalogColumnStatisticsDataBinary(755L, 43.5D, 20L);
    Map<String, CatalogColumnStatisticsDataBase> colStatsMap = new HashMap<>(6);
    colStatsMap.put("b1", booleanColStats);
    colStatsMap.put("l2", longColStats);
    colStatsMap.put("s3", stringColStats);
    colStatsMap.put("d4", dateColStats);
    colStatsMap.put("dd5", doubleColStats);
    colStatsMap.put("bb6", binaryColStats);
    return new CatalogColumnStatistics(colStatsMap);
}
Also used : CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBase(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase) HashMap(java.util.HashMap) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) Date(org.apache.flink.table.catalog.stats.Date) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics)

Aggregations

CatalogColumnStatisticsDataDate (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate)4 Date (org.apache.flink.table.catalog.stats.Date)4 HashMap (java.util.HashMap)3 CatalogColumnStatistics (org.apache.flink.table.catalog.stats.CatalogColumnStatistics)3 CatalogColumnStatisticsDataBase (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBase)3 CatalogColumnStatisticsDataBoolean (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean)3 CatalogColumnStatisticsDataDouble (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble)3 CatalogColumnStatisticsDataLong (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong)3 CatalogColumnStatisticsDataString (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString)3 CatalogColumnStatisticsDataBinary (org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary)2 InvocationTargetException (java.lang.reflect.InvocationTargetException)1 Method (java.lang.reflect.Method)1 LinkedHashMap (java.util.LinkedHashMap)1 TableSchema (org.apache.flink.table.api.TableSchema)1 CatalogTable (org.apache.flink.table.catalog.CatalogTable)1 CatalogTableImpl (org.apache.flink.table.catalog.CatalogTableImpl)1 CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)1 ColumnStatisticsData (org.apache.hadoop.hive.metastore.api.ColumnStatisticsData)1 Test (org.junit.Test)1