Search in sources :

Example 1 with CatalogException

use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.

the class HiveStatsUtil method getColumnStatisticsData.

/**
 * Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
 * currently assume that, in Flink, the max and min of ColumnStats will be same type as the
 * Flink column type. For example, for SHORT and Long columns, the max and min of their
 * ColumnStats should be of type SHORT and LONG.
 */
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
    LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
    if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
        if (colStat instanceof CatalogColumnStatisticsDataString) {
            CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
            StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
            hiveStringColumnStats.clear();
            if (null != stringColStat.getMaxLength()) {
                hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
            }
            if (null != stringColStat.getAvgLength()) {
                hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
            }
            if (null != stringColStat.getNullCount()) {
                hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
            }
            if (null != stringColStat.getNdv()) {
                hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
            }
            return ColumnStatisticsData.stringStats(hiveStringColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
        if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
            CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
            BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
            hiveBoolStats.clear();
            if (null != booleanColStat.getTrueCount()) {
                hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
            }
            if (null != booleanColStat.getFalseCount()) {
                hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
            }
            if (null != booleanColStat.getNullCount()) {
                hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
            }
            return ColumnStatisticsData.booleanStats(hiveBoolStats);
        }
    } else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
        if (colStat instanceof CatalogColumnStatisticsDataLong) {
            CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
            LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
            hiveLongColStats.clear();
            if (null != longColStat.getMax()) {
                hiveLongColStats.setHighValue(longColStat.getMax());
            }
            if (null != longColStat.getMin()) {
                hiveLongColStats.setLowValue(longColStat.getMin());
            }
            if (null != longColStat.getNdv()) {
                hiveLongColStats.setNumDVs(longColStat.getNdv());
            }
            if (null != longColStat.getNullCount()) {
                hiveLongColStats.setNumNulls(longColStat.getNullCount());
            }
            return ColumnStatisticsData.longStats(hiveLongColStats);
        }
    } else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
            DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
            hiveFloatStats.clear();
            if (null != doubleColumnStatsData.getMax()) {
                hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
            }
            if (null != doubleColumnStatsData.getMin()) {
                hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
            }
            if (null != doubleColumnStatsData.getNullCount()) {
                hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
            }
            if (null != doubleColumnStatsData.getNdv()) {
                hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
            }
            return ColumnStatisticsData.doubleStats(hiveFloatStats);
        }
    } else if (type.equals(LogicalTypeRoot.DATE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDate) {
            HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
            return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
        }
    } else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
        if (colStat instanceof CatalogColumnStatisticsDataBinary) {
            CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
            BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
            hiveBinaryColumnStats.clear();
            if (null != binaryColumnStatsData.getMaxLength()) {
                hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
            }
            if (null != binaryColumnStatsData.getAvgLength()) {
                hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
            }
            if (null != binaryColumnStatsData.getNullCount()) {
                hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
            }
            return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.DECIMAL)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
            DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
            if (flinkStats.getMax() != null) {
                // in older versions we cannot create HiveDecimal from Double, so convert Double
                // to BigDecimal first
                hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
            }
            if (flinkStats.getMin() != null) {
                hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
            }
            if (flinkStats.getNdv() != null) {
                hiveStats.setNumDVs(flinkStats.getNdv());
            }
            if (flinkStats.getNullCount() != null) {
                hiveStats.setNumNulls(flinkStats.getNullCount());
            }
            return ColumnStatisticsData.decimalStats(hiveStats);
        }
    }
    throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 2 with CatalogException

use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.

the class HiveTestUtils method createHiveConf.

public static HiveConf createHiveConf() {
    ClassLoader classLoader = HiveTestUtils.class.getClassLoader();
    try {
        TEMPORARY_FOLDER.create();
        String warehouseDir = TEMPORARY_FOLDER.newFolder().getAbsolutePath() + "/metastore_db";
        String warehouseUri = String.format(HIVE_WAREHOUSE_URI_FORMAT, warehouseDir);
        HiveConf hiveConf = new HiveConf();
        hiveConf.addResource(classLoader.getResource(HiveCatalog.HIVE_SITE_FILE));
        hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEMPORARY_FOLDER.newFolder("hive_warehouse").getAbsolutePath());
        hiveConf.setVar(HiveConf.ConfVars.METASTORECONNECTURLKEY, warehouseUri);
        return hiveConf;
    } catch (IOException e) {
        throw new CatalogException("Failed to create test HiveConf to HiveCatalog.", e);
    }
}
Also used : CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) HiveConf(org.apache.hadoop.hive.conf.HiveConf) IOException(java.io.IOException)

Example 3 with CatalogException

use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.

the class HiveTableSink method consume.

private DataStreamSink<?> consume(ProviderContext providerContext, DataStream<RowData> dataStream, boolean isBounded, DataStructureConverter converter) {
    checkAcidTable(catalogTable.getOptions(), identifier.toObjectPath());
    try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(HiveConfUtils.create(jobConf), hiveVersion)) {
        Table table = client.getTable(identifier.getDatabaseName(), identifier.getObjectName());
        StorageDescriptor sd = table.getSd();
        Class hiveOutputFormatClz = hiveShim.getHiveOutputFormatClass(Class.forName(sd.getOutputFormat()));
        boolean isCompressed = jobConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
        HiveWriterFactory writerFactory = new HiveWriterFactory(jobConf, hiveOutputFormatClz, sd.getSerdeInfo(), tableSchema, getPartitionKeyArray(), HiveReflectionUtils.getTableMetadata(hiveShim, table), hiveShim, isCompressed);
        String extension = Utilities.getFileExtension(jobConf, isCompressed, (HiveOutputFormat<?, ?>) hiveOutputFormatClz.newInstance());
        OutputFileConfig.OutputFileConfigBuilder fileNamingBuilder = OutputFileConfig.builder().withPartPrefix("part-" + UUID.randomUUID().toString()).withPartSuffix(extension == null ? "" : extension);
        final int parallelism = Optional.ofNullable(configuredParallelism).orElse(dataStream.getParallelism());
        if (isBounded) {
            OutputFileConfig fileNaming = fileNamingBuilder.build();
            return createBatchSink(dataStream, converter, sd, writerFactory, fileNaming, parallelism);
        } else {
            if (overwrite) {
                throw new IllegalStateException("Streaming mode not support overwrite.");
            }
            Properties tableProps = HiveReflectionUtils.getTableMetadata(hiveShim, table);
            return createStreamSink(providerContext, dataStream, sd, tableProps, writerFactory, fileNamingBuilder, parallelism);
        }
    } catch (TException e) {
        throw new CatalogException("Failed to query Hive metaStore", e);
    } catch (IOException e) {
        throw new FlinkRuntimeException("Failed to create staging dir", e);
    } catch (ClassNotFoundException e) {
        throw new FlinkHiveException("Failed to get output format class", e);
    } catch (IllegalAccessException | InstantiationException e) {
        throw new FlinkHiveException("Failed to instantiate output format instance", e);
    }
}
Also used : TException(org.apache.thrift.TException) CatalogTable(org.apache.flink.table.catalog.CatalogTable) Table(org.apache.hadoop.hive.metastore.api.Table) HiveTableUtil.checkAcidTable(org.apache.flink.table.catalog.hive.util.HiveTableUtil.checkAcidTable) StorageDescriptor(org.apache.hadoop.hive.metastore.api.StorageDescriptor) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) UncheckedIOException(java.io.UncheckedIOException) IOException(java.io.IOException) Properties(java.util.Properties) OutputFileConfig(org.apache.flink.streaming.api.functions.sink.filesystem.OutputFileConfig) HiveMetastoreClientWrapper(org.apache.flink.table.catalog.hive.client.HiveMetastoreClientWrapper) FlinkRuntimeException(org.apache.flink.util.FlinkRuntimeException) HiveWriterFactory(org.apache.flink.connectors.hive.write.HiveWriterFactory)

Example 4 with CatalogException

use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.

the class HiveCatalog method getPartitionColumnStatistics.

@Override
public CatalogColumnStatistics getPartitionColumnStatistics(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) throws PartitionNotExistException, CatalogException {
    try {
        Partition partition = getHivePartition(tablePath, partitionSpec);
        Table hiveTable = getHiveTable(tablePath);
        String partName = getEscapedPartitionName(tablePath, partitionSpec, hiveTable);
        List<String> partNames = new ArrayList<>();
        partNames.add(partName);
        Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = client.getPartitionColumnStatistics(partition.getDbName(), partition.getTableName(), partNames, getFieldNames(partition.getSd().getCols()));
        List<ColumnStatisticsObj> columnStatisticsObjs = partitionColumnStatistics.get(partName);
        if (columnStatisticsObjs != null && !columnStatisticsObjs.isEmpty()) {
            return new CatalogColumnStatistics(HiveStatsUtil.createCatalogColumnStats(columnStatisticsObjs, hiveVersion));
        } else {
            return CatalogColumnStatistics.UNKNOWN;
        }
    } catch (TableNotExistException | PartitionSpecInvalidException e) {
        throw new PartitionNotExistException(getName(), tablePath, partitionSpec);
    } catch (TException e) {
        throw new CatalogException(String.format("Failed to get table stats of table %s 's partition %s", tablePath.getFullName(), String.valueOf(partitionSpec)), e);
    }
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.metastore.api.Partition) CatalogPartition(org.apache.flink.table.catalog.CatalogPartition) CatalogTable(org.apache.flink.table.catalog.CatalogTable) SqlCreateHiveTable(org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable) Table(org.apache.hadoop.hive.metastore.api.Table) CatalogBaseTable(org.apache.flink.table.catalog.CatalogBaseTable) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) ArrayList(java.util.ArrayList) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) CatalogColumnStatistics(org.apache.flink.table.catalog.stats.CatalogColumnStatistics) ColumnStatisticsObj(org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj) ArrayList(java.util.ArrayList) List(java.util.List) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) PartitionSpecInvalidException(org.apache.flink.table.catalog.exceptions.PartitionSpecInvalidException)

Example 5 with CatalogException

use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.

the class HiveCatalog method getPartition.

@Override
public CatalogPartition getPartition(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) throws PartitionNotExistException, CatalogException {
    checkNotNull(tablePath, "Table path cannot be null");
    checkNotNull(partitionSpec, "CatalogPartitionSpec cannot be null");
    try {
        Partition hivePartition = getHivePartition(tablePath, partitionSpec);
        Map<String, String> properties = hivePartition.getParameters();
        properties.put(SqlCreateHiveTable.TABLE_LOCATION_URI, hivePartition.getSd().getLocation());
        String comment = properties.remove(HiveCatalogConfig.COMMENT);
        return new CatalogPartitionImpl(properties, comment);
    } catch (NoSuchObjectException | MetaException | TableNotExistException | PartitionSpecInvalidException e) {
        throw new PartitionNotExistException(getName(), tablePath, partitionSpec, e);
    } catch (TException e) {
        throw new CatalogException(String.format("Failed to get partition %s of table %s", partitionSpec, tablePath), e);
    }
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.metastore.api.Partition) CatalogPartition(org.apache.flink.table.catalog.CatalogPartition) TableNotExistException(org.apache.flink.table.catalog.exceptions.TableNotExistException) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) PartitionNotExistException(org.apache.flink.table.catalog.exceptions.PartitionNotExistException) PartitionSpecInvalidException(org.apache.flink.table.catalog.exceptions.PartitionSpecInvalidException) CatalogPartitionImpl(org.apache.flink.table.catalog.CatalogPartitionImpl) MetaException(org.apache.hadoop.hive.metastore.api.MetaException)

Aggregations

CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)53 TException (org.apache.thrift.TException)28 TableNotExistException (org.apache.flink.table.catalog.exceptions.TableNotExistException)16 Table (org.apache.hadoop.hive.metastore.api.Table)15 InvocationTargetException (java.lang.reflect.InvocationTargetException)14 CatalogTable (org.apache.flink.table.catalog.CatalogTable)14 Method (java.lang.reflect.Method)13 CatalogBaseTable (org.apache.flink.table.catalog.CatalogBaseTable)13 SqlCreateHiveTable (org.apache.flink.sql.parser.hive.ddl.SqlCreateHiveTable)12 PartitionNotExistException (org.apache.flink.table.catalog.exceptions.PartitionNotExistException)9 PartitionSpecInvalidException (org.apache.flink.table.catalog.exceptions.PartitionSpecInvalidException)9 MetaException (org.apache.hadoop.hive.metastore.api.MetaException)9 ArrayList (java.util.ArrayList)8 List (java.util.List)8 FlinkHiveException (org.apache.flink.connectors.hive.FlinkHiveException)8 DatabaseNotExistException (org.apache.flink.table.catalog.exceptions.DatabaseNotExistException)8 CatalogPartition (org.apache.flink.table.catalog.CatalogPartition)7 NoSuchObjectException (org.apache.hadoop.hive.metastore.api.NoSuchObjectException)7 PartitionAlreadyExistsException (org.apache.flink.table.catalog.exceptions.PartitionAlreadyExistsException)6 InvalidOperationException (org.apache.hadoop.hive.metastore.api.InvalidOperationException)6