use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.
the class HiveStatsUtil method getColumnStatisticsData.
/**
* Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
* currently assume that, in Flink, the max and min of ColumnStats will be same type as the
* Flink column type. For example, for SHORT and Long columns, the max and min of their
* ColumnStats should be of type SHORT and LONG.
*/
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
if (colStat instanceof CatalogColumnStatisticsDataString) {
CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
hiveStringColumnStats.clear();
if (null != stringColStat.getMaxLength()) {
hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
}
if (null != stringColStat.getAvgLength()) {
hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
}
if (null != stringColStat.getNullCount()) {
hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
}
if (null != stringColStat.getNdv()) {
hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
}
return ColumnStatisticsData.stringStats(hiveStringColumnStats);
}
} else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
hiveBoolStats.clear();
if (null != booleanColStat.getTrueCount()) {
hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
}
if (null != booleanColStat.getFalseCount()) {
hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
}
if (null != booleanColStat.getNullCount()) {
hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
}
return ColumnStatisticsData.booleanStats(hiveBoolStats);
}
} else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
if (colStat instanceof CatalogColumnStatisticsDataLong) {
CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
hiveLongColStats.clear();
if (null != longColStat.getMax()) {
hiveLongColStats.setHighValue(longColStat.getMax());
}
if (null != longColStat.getMin()) {
hiveLongColStats.setLowValue(longColStat.getMin());
}
if (null != longColStat.getNdv()) {
hiveLongColStats.setNumDVs(longColStat.getNdv());
}
if (null != longColStat.getNullCount()) {
hiveLongColStats.setNumNulls(longColStat.getNullCount());
}
return ColumnStatisticsData.longStats(hiveLongColStats);
}
} else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
hiveFloatStats.clear();
if (null != doubleColumnStatsData.getMax()) {
hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
}
if (null != doubleColumnStatsData.getMin()) {
hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
}
if (null != doubleColumnStatsData.getNullCount()) {
hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
}
if (null != doubleColumnStatsData.getNdv()) {
hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
}
return ColumnStatisticsData.doubleStats(hiveFloatStats);
}
} else if (type.equals(LogicalTypeRoot.DATE)) {
if (colStat instanceof CatalogColumnStatisticsDataDate) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
}
} else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
if (colStat instanceof CatalogColumnStatisticsDataBinary) {
CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
hiveBinaryColumnStats.clear();
if (null != binaryColumnStatsData.getMaxLength()) {
hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
}
if (null != binaryColumnStatsData.getAvgLength()) {
hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
}
if (null != binaryColumnStatsData.getNullCount()) {
hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
}
return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
}
} else if (type.equals(LogicalTypeRoot.DECIMAL)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
if (flinkStats.getMax() != null) {
// in older versions we cannot create HiveDecimal from Double, so convert Double
// to BigDecimal first
hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
}
if (flinkStats.getMin() != null) {
hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
}
if (flinkStats.getNdv() != null) {
hiveStats.setNumDVs(flinkStats.getNdv());
}
if (flinkStats.getNullCount() != null) {
hiveStats.setNumNulls(flinkStats.getNullCount());
}
return ColumnStatisticsData.decimalStats(hiveStats);
}
}
throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.
the class HiveTestUtils method createHiveConf.
public static HiveConf createHiveConf() {
ClassLoader classLoader = HiveTestUtils.class.getClassLoader();
try {
TEMPORARY_FOLDER.create();
String warehouseDir = TEMPORARY_FOLDER.newFolder().getAbsolutePath() + "/metastore_db";
String warehouseUri = String.format(HIVE_WAREHOUSE_URI_FORMAT, warehouseDir);
HiveConf hiveConf = new HiveConf();
hiveConf.addResource(classLoader.getResource(HiveCatalog.HIVE_SITE_FILE));
hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEMPORARY_FOLDER.newFolder("hive_warehouse").getAbsolutePath());
hiveConf.setVar(HiveConf.ConfVars.METASTORECONNECTURLKEY, warehouseUri);
return hiveConf;
} catch (IOException e) {
throw new CatalogException("Failed to create test HiveConf to HiveCatalog.", e);
}
}
use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.
the class HiveTableSink method consume.
private DataStreamSink<?> consume(ProviderContext providerContext, DataStream<RowData> dataStream, boolean isBounded, DataStructureConverter converter) {
checkAcidTable(catalogTable.getOptions(), identifier.toObjectPath());
try (HiveMetastoreClientWrapper client = HiveMetastoreClientFactory.create(HiveConfUtils.create(jobConf), hiveVersion)) {
Table table = client.getTable(identifier.getDatabaseName(), identifier.getObjectName());
StorageDescriptor sd = table.getSd();
Class hiveOutputFormatClz = hiveShim.getHiveOutputFormatClass(Class.forName(sd.getOutputFormat()));
boolean isCompressed = jobConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
HiveWriterFactory writerFactory = new HiveWriterFactory(jobConf, hiveOutputFormatClz, sd.getSerdeInfo(), tableSchema, getPartitionKeyArray(), HiveReflectionUtils.getTableMetadata(hiveShim, table), hiveShim, isCompressed);
String extension = Utilities.getFileExtension(jobConf, isCompressed, (HiveOutputFormat<?, ?>) hiveOutputFormatClz.newInstance());
OutputFileConfig.OutputFileConfigBuilder fileNamingBuilder = OutputFileConfig.builder().withPartPrefix("part-" + UUID.randomUUID().toString()).withPartSuffix(extension == null ? "" : extension);
final int parallelism = Optional.ofNullable(configuredParallelism).orElse(dataStream.getParallelism());
if (isBounded) {
OutputFileConfig fileNaming = fileNamingBuilder.build();
return createBatchSink(dataStream, converter, sd, writerFactory, fileNaming, parallelism);
} else {
if (overwrite) {
throw new IllegalStateException("Streaming mode not support overwrite.");
}
Properties tableProps = HiveReflectionUtils.getTableMetadata(hiveShim, table);
return createStreamSink(providerContext, dataStream, sd, tableProps, writerFactory, fileNamingBuilder, parallelism);
}
} catch (TException e) {
throw new CatalogException("Failed to query Hive metaStore", e);
} catch (IOException e) {
throw new FlinkRuntimeException("Failed to create staging dir", e);
} catch (ClassNotFoundException e) {
throw new FlinkHiveException("Failed to get output format class", e);
} catch (IllegalAccessException | InstantiationException e) {
throw new FlinkHiveException("Failed to instantiate output format instance", e);
}
}
use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.
the class HiveCatalog method getPartitionColumnStatistics.
@Override
public CatalogColumnStatistics getPartitionColumnStatistics(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) throws PartitionNotExistException, CatalogException {
try {
Partition partition = getHivePartition(tablePath, partitionSpec);
Table hiveTable = getHiveTable(tablePath);
String partName = getEscapedPartitionName(tablePath, partitionSpec, hiveTable);
List<String> partNames = new ArrayList<>();
partNames.add(partName);
Map<String, List<ColumnStatisticsObj>> partitionColumnStatistics = client.getPartitionColumnStatistics(partition.getDbName(), partition.getTableName(), partNames, getFieldNames(partition.getSd().getCols()));
List<ColumnStatisticsObj> columnStatisticsObjs = partitionColumnStatistics.get(partName);
if (columnStatisticsObjs != null && !columnStatisticsObjs.isEmpty()) {
return new CatalogColumnStatistics(HiveStatsUtil.createCatalogColumnStats(columnStatisticsObjs, hiveVersion));
} else {
return CatalogColumnStatistics.UNKNOWN;
}
} catch (TableNotExistException | PartitionSpecInvalidException e) {
throw new PartitionNotExistException(getName(), tablePath, partitionSpec);
} catch (TException e) {
throw new CatalogException(String.format("Failed to get table stats of table %s 's partition %s", tablePath.getFullName(), String.valueOf(partitionSpec)), e);
}
}
use of org.apache.flink.table.catalog.exceptions.CatalogException in project flink by apache.
the class HiveCatalog method getPartition.
@Override
public CatalogPartition getPartition(ObjectPath tablePath, CatalogPartitionSpec partitionSpec) throws PartitionNotExistException, CatalogException {
checkNotNull(tablePath, "Table path cannot be null");
checkNotNull(partitionSpec, "CatalogPartitionSpec cannot be null");
try {
Partition hivePartition = getHivePartition(tablePath, partitionSpec);
Map<String, String> properties = hivePartition.getParameters();
properties.put(SqlCreateHiveTable.TABLE_LOCATION_URI, hivePartition.getSd().getLocation());
String comment = properties.remove(HiveCatalogConfig.COMMENT);
return new CatalogPartitionImpl(properties, comment);
} catch (NoSuchObjectException | MetaException | TableNotExistException | PartitionSpecInvalidException e) {
throw new PartitionNotExistException(getName(), tablePath, partitionSpec, e);
} catch (TException e) {
throw new CatalogException(String.format("Failed to get partition %s of table %s", partitionSpec, tablePath), e);
}
}
Aggregations