use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveLookupTableSource method getLookupFunction.
private TableFunction<RowData> getLookupFunction(int[] keys) {
final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
final PartitionFetcher<HiveTablePartition> partitionFetcher;
// avoid lambda capture
final ObjectPath tableFullPath = tablePath;
if (catalogTable.getPartitionKeys().isEmpty()) {
// non-partitioned table, the fetcher fetches the partition which represents the given
// table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
return partValueList;
};
} else if (isStreamingSource()) {
// streaming-read partitioned table, the fetcher fetches the latest partition of the
// given table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
// fetch latest partitions for partitioned table
if (comparablePartitionValues.size() > 0) {
// sort in desc order
comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
} else {
throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
}
return partValueList;
};
} else {
// bounded-read partitioned table, the fetcher fetches all partitions of the given
// filesystem table.
partitionFetcher = context -> {
List<HiveTablePartition> partValueList = new ArrayList<>();
List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
}
return partValueList;
};
}
PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveParserRexNodeConverter method convertConstant.
public static RexNode convertConstant(ExprNodeConstantDesc literal, RelOptCluster cluster) throws SemanticException {
RexBuilder rexBuilder = cluster.getRexBuilder();
RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
RelDataType calciteDataType = HiveParserTypeConverter.convert(hiveType, dtFactory);
PrimitiveObjectInspector.PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
ConstantObjectInspector coi = literal.getWritableObjectInspector();
Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), coi);
RexNode calciteLiteral;
HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
// If value is null, the type should also be VOID.
if (value == null) {
hiveTypeCategory = PrimitiveObjectInspector.PrimitiveCategory.VOID;
}
// TODO: Verify if we need to use ConstantObjectInspector to unwrap data
switch(hiveTypeCategory) {
case BOOLEAN:
calciteLiteral = rexBuilder.makeLiteral((Boolean) value);
break;
case BYTE:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
break;
case SHORT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
break;
case INT:
calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
break;
case LONG:
calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
break;
// TODO: is Decimal an exact numeric or approximate numeric?
case DECIMAL:
if (value instanceof HiveDecimal) {
value = ((HiveDecimal) value).bigDecimalValue();
} else if (value instanceof Decimal128) {
value = ((Decimal128) value).toBigDecimal();
}
if (value == null) {
// For now, we will not run CBO in the presence of invalid decimal literals.
throw new SemanticException("Expression " + literal.getExprString() + " is not a valid decimal");
// TODO: return createNullLiteral(literal);
}
BigDecimal bd = (BigDecimal) value;
BigInteger unscaled = bd.unscaledValue();
if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) {
calciteLiteral = rexBuilder.makeExactLiteral(bd);
} else {
// CBO doesn't support unlimited precision decimals. In practice, this
// will work...
// An alternative would be to throw CboSemanticException and fall back
// to no CBO.
RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, unscaled.toString().length(), bd.scale());
calciteLiteral = rexBuilder.makeExactLiteral(bd, relType);
}
break;
case FLOAT:
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Float.toString((Float) value)), calciteDataType);
break;
case DOUBLE:
// TODO: The best solution is to support NaN in expression reduction.
if (Double.isNaN((Double) value)) {
throw new SemanticException("NaN");
}
calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Double.toString((Double) value)), calciteDataType);
break;
case CHAR:
if (value instanceof HiveChar) {
value = ((HiveChar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case VARCHAR:
if (value instanceof HiveVarchar) {
value = ((HiveVarchar) value).getValue();
}
calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
break;
case STRING:
Object constantDescVal = literal.getValue();
constantDescVal = constantDescVal instanceof NlsString ? constantDescVal : asUnicodeString((String) value);
// calcite treat string literal as char type, we should treat it as string just like
// hive
RelDataType type = HiveParserTypeConverter.convert(hiveType, dtFactory);
// if we get here, the value is not null
type = dtFactory.createTypeWithNullability(type, false);
calciteLiteral = rexBuilder.makeLiteral(constantDescVal, type, true);
break;
case DATE:
LocalDate localDate = HiveParserUtils.getSessionHiveShim().toFlinkDate(value);
DateString dateString = new DateString(localDate.getYear(), localDate.getMonthValue(), localDate.getDayOfMonth());
calciteLiteral = rexBuilder.makeDateLiteral(dateString);
break;
case TIMESTAMP:
TimestampString timestampString;
if (value instanceof Calendar) {
timestampString = TimestampString.fromCalendarFields((Calendar) value);
} else {
LocalDateTime localDateTime = HiveParserUtils.getSessionHiveShim().toFlinkTimestamp(value);
timestampString = new TimestampString(localDateTime.getYear(), localDateTime.getMonthValue(), localDateTime.getDayOfMonth(), localDateTime.getHour(), localDateTime.getMinute(), localDateTime.getSecond());
timestampString = timestampString.withNanos(localDateTime.getNano());
}
// hive always treats timestamp with precision 9
calciteLiteral = rexBuilder.makeTimestampLiteral(timestampString, 9);
break;
case VOID:
calciteLiteral = cluster.getRexBuilder().makeLiteral(null, dtFactory.createSqlType(SqlTypeName.NULL), true);
break;
case BINARY:
case UNKNOWN:
default:
if (hiveShim.isIntervalYearMonthType(hiveTypeCategory)) {
// Calcite year-month literal value is months as BigDecimal
BigDecimal totalMonths = BigDecimal.valueOf(((HiveParserIntervalYearMonth) value).getTotalMonths());
calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths, new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
} else if (hiveShim.isIntervalDayTimeType(hiveTypeCategory)) {
// Calcite day-time interval is millis value as BigDecimal
// Seconds converted to millis
BigDecimal secsValueBd = BigDecimal.valueOf(((HiveParserIntervalDayTime) value).getTotalSeconds() * 1000);
// Nanos converted to millis
BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveParserIntervalDayTime) value).getNanos(), 6);
calciteLiteral = rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new SqlParserPos(1, 1)));
} else {
throw new RuntimeException("UnSupported Literal type " + hiveTypeCategory);
}
}
return calciteLiteral;
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveParser method parse.
@Override
public List<Operation> parse(String statement) {
CatalogManager catalogManager = getCatalogManager();
Catalog currentCatalog = catalogManager.getCatalog(catalogManager.getCurrentCatalog()).orElse(null);
if (!(currentCatalog instanceof HiveCatalog)) {
LOG.warn("Current catalog is not HiveCatalog. Falling back to Flink's planner.");
return super.parse(statement);
}
HiveConf hiveConf = new HiveConf(((HiveCatalog) currentCatalog).getHiveConf());
hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
hiveConf.set("hive.allow.udf.load.on.demand", "false");
hiveConf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr");
HiveShim hiveShim = HiveShimLoader.loadHiveShim(((HiveCatalog) currentCatalog).getHiveVersion());
try {
// creates SessionState
startSessionState(hiveConf, catalogManager);
// We override Hive's grouping function. Refer to the implementation for more details.
hiveShim.registerTemporaryFunction("grouping", HiveGenericUDFGrouping.class);
return processCmd(statement, hiveConf, hiveShim, (HiveCatalog) currentCatalog);
} finally {
clearSessionState();
}
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveStatsUtil method getColumnStatisticsData.
/**
* Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
* currently assume that, in Flink, the max and min of ColumnStats will be same type as the
* Flink column type. For example, for SHORT and Long columns, the max and min of their
* ColumnStats should be of type SHORT and LONG.
*/
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
if (colStat instanceof CatalogColumnStatisticsDataString) {
CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
hiveStringColumnStats.clear();
if (null != stringColStat.getMaxLength()) {
hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
}
if (null != stringColStat.getAvgLength()) {
hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
}
if (null != stringColStat.getNullCount()) {
hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
}
if (null != stringColStat.getNdv()) {
hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
}
return ColumnStatisticsData.stringStats(hiveStringColumnStats);
}
} else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
hiveBoolStats.clear();
if (null != booleanColStat.getTrueCount()) {
hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
}
if (null != booleanColStat.getFalseCount()) {
hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
}
if (null != booleanColStat.getNullCount()) {
hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
}
return ColumnStatisticsData.booleanStats(hiveBoolStats);
}
} else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
if (colStat instanceof CatalogColumnStatisticsDataLong) {
CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
hiveLongColStats.clear();
if (null != longColStat.getMax()) {
hiveLongColStats.setHighValue(longColStat.getMax());
}
if (null != longColStat.getMin()) {
hiveLongColStats.setLowValue(longColStat.getMin());
}
if (null != longColStat.getNdv()) {
hiveLongColStats.setNumDVs(longColStat.getNdv());
}
if (null != longColStat.getNullCount()) {
hiveLongColStats.setNumNulls(longColStat.getNullCount());
}
return ColumnStatisticsData.longStats(hiveLongColStats);
}
} else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
hiveFloatStats.clear();
if (null != doubleColumnStatsData.getMax()) {
hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
}
if (null != doubleColumnStatsData.getMin()) {
hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
}
if (null != doubleColumnStatsData.getNullCount()) {
hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
}
if (null != doubleColumnStatsData.getNdv()) {
hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
}
return ColumnStatisticsData.doubleStats(hiveFloatStats);
}
} else if (type.equals(LogicalTypeRoot.DATE)) {
if (colStat instanceof CatalogColumnStatisticsDataDate) {
HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
}
} else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
if (colStat instanceof CatalogColumnStatisticsDataBinary) {
CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
hiveBinaryColumnStats.clear();
if (null != binaryColumnStatsData.getMaxLength()) {
hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
}
if (null != binaryColumnStatsData.getAvgLength()) {
hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
}
if (null != binaryColumnStatsData.getNullCount()) {
hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
}
return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
}
} else if (type.equals(LogicalTypeRoot.DECIMAL)) {
if (colStat instanceof CatalogColumnStatisticsDataDouble) {
CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
if (flinkStats.getMax() != null) {
// in older versions we cannot create HiveDecimal from Double, so convert Double
// to BigDecimal first
hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
}
if (flinkStats.getMin() != null) {
hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
}
if (flinkStats.getNdv() != null) {
hiveStats.setNumDVs(flinkStats.getNdv());
}
if (flinkStats.getNullCount() != null) {
hiveStats.setNumNulls(flinkStats.getNullCount());
}
return ColumnStatisticsData.decimalStats(hiveStats);
}
}
throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.
the class HiveGenericUDTF method open.
@Override
public void open(FunctionContext context) throws Exception {
function = hiveFunctionWrapper.createFunction();
function.setCollector(input -> {
Row row = (Row) HiveInspectors.toFlinkObject(returnInspector, input, hiveShim);
HiveGenericUDTF.this.collect(row);
});
ObjectInspector[] argumentInspectors = HiveInspectors.toInspectors(hiveShim, constantArguments, argTypes);
returnInspector = function.initialize(argumentInspectors);
isArgsSingleArray = HiveFunctionUtil.isSingleBoxedArray(argTypes);
conversions = new HiveObjectConversion[argumentInspectors.length];
for (int i = 0; i < argumentInspectors.length; i++) {
conversions[i] = HiveInspectors.getConversion(argumentInspectors[i], argTypes[i].getLogicalType(), hiveShim);
}
allIdentityConverter = Arrays.stream(conversions).allMatch(conv -> conv instanceof IdentityConversion);
}
Aggregations