Search in sources :

Example 1 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveLookupTableSource method getLookupFunction.

private TableFunction<RowData> getLookupFunction(int[] keys) {
    final String defaultPartitionName = JobConfUtils.getDefaultPartitionName(jobConf);
    PartitionFetcher.Context<HiveTablePartition> fetcherContext = new HiveTablePartitionFetcherContext(tablePath, hiveShim, new JobConfWrapper(jobConf), catalogTable.getPartitionKeys(), getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), configuration, defaultPartitionName);
    final PartitionFetcher<HiveTablePartition> partitionFetcher;
    // avoid lambda capture
    final ObjectPath tableFullPath = tablePath;
    if (catalogTable.getPartitionKeys().isEmpty()) {
        // non-partitioned table, the fetcher fetches the partition which represents the given
        // table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            partValueList.add(context.getPartition(new ArrayList<>()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            return partValueList;
        };
    } else if (isStreamingSource()) {
        // streaming-read partitioned table, the fetcher fetches the latest partition of the
        // given table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            // fetch latest partitions for partitioned table
            if (comparablePartitionValues.size() > 0) {
                // sort in desc order
                comparablePartitionValues.sort((o1, o2) -> o2.getComparator().compareTo(o1.getComparator()));
                PartitionFetcher.Context.ComparablePartitionValue maxPartition = comparablePartitionValues.get(0);
                partValueList.add(context.getPartition((List<String>) maxPartition.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            } else {
                throw new IllegalArgumentException(String.format("At least one partition is required when set '%s' to 'latest' in temporal join," + " but actual partition number is '%s' for hive table %s", STREAMING_SOURCE_PARTITION_INCLUDE.key(), comparablePartitionValues.size(), tableFullPath));
            }
            return partValueList;
        };
    } else {
        // bounded-read partitioned table, the fetcher fetches all partitions of the given
        // filesystem table.
        partitionFetcher = context -> {
            List<HiveTablePartition> partValueList = new ArrayList<>();
            List<PartitionFetcher.Context.ComparablePartitionValue> comparablePartitionValues = context.getComparablePartitionValueList();
            for (PartitionFetcher.Context.ComparablePartitionValue comparablePartitionValue : comparablePartitionValues) {
                partValueList.add(context.getPartition((List<String>) comparablePartitionValue.getPartitionValue()).orElseThrow(() -> new IllegalArgumentException(String.format("Fetch partition fail for hive table %s.", tableFullPath))));
            }
            return partValueList;
        };
    }
    PartitionReader<HiveTablePartition, RowData> partitionReader = new HiveInputFormatPartitionReader(flinkConf, jobConf, hiveVersion, tablePath, getProducedTableSchema().getFieldDataTypes(), getProducedTableSchema().getFieldNames(), catalogTable.getPartitionKeys(), projectedFields, flinkConf.get(HiveOptions.TABLE_EXEC_HIVE_FALLBACK_MAPRED_READER));
    return new FileSystemLookupFunction<>(partitionFetcher, fetcherContext, partitionReader, (RowType) getProducedTableSchema().toRowDataType().getLogicalType(), keys, hiveTableReloadInterval);
}
Also used : HivePartitionUtils(org.apache.flink.connectors.hive.util.HivePartitionUtils) TableFunction(org.apache.flink.table.functions.TableFunction) PartitionReader(org.apache.flink.connector.file.table.PartitionReader) DataType(org.apache.flink.table.types.DataType) CatalogTable(org.apache.flink.table.catalog.CatalogTable) LoggerFactory(org.slf4j.LoggerFactory) STREAMING_SOURCE_PARTITION_INCLUDE(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_PARTITION_INCLUDE) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) JobConfUtils(org.apache.flink.connectors.hive.util.JobConfUtils) RowType(org.apache.flink.table.types.logical.RowType) ObjectPath(org.apache.flink.table.catalog.ObjectPath) Partition(org.apache.hadoop.hive.metastore.api.Partition) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) ArrayList(java.util.ArrayList) LookupTableSource(org.apache.flink.table.connector.source.LookupTableSource) ReadableConfig(org.apache.flink.configuration.ReadableConfig) Duration(java.time.Duration) HivePartitionFetcherContextBase(org.apache.flink.connectors.hive.read.HivePartitionFetcherContextBase) RowData(org.apache.flink.table.data.RowData) Logger(org.slf4j.Logger) STREAMING_SOURCE_CONSUME_START_OFFSET(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_CONSUME_START_OFFSET) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) Configuration(org.apache.flink.configuration.Configuration) Preconditions(org.apache.flink.util.Preconditions) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) JobConf(org.apache.hadoop.mapred.JobConf) LOOKUP_JOIN_CACHE_TTL(org.apache.flink.connectors.hive.HiveOptions.LOOKUP_JOIN_CACHE_TTL) List(java.util.List) Optional(java.util.Optional) TableFunctionProvider(org.apache.flink.table.connector.source.TableFunctionProvider) STREAMING_SOURCE_MONITOR_INTERVAL(org.apache.flink.connectors.hive.HiveOptions.STREAMING_SOURCE_MONITOR_INTERVAL) NoSuchObjectException(org.apache.hadoop.hive.metastore.api.NoSuchObjectException) ObjectPath(org.apache.flink.table.catalog.ObjectPath) HiveInputFormatPartitionReader(org.apache.flink.connectors.hive.read.HiveInputFormatPartitionReader) ArrayList(java.util.ArrayList) RowData(org.apache.flink.table.data.RowData) PartitionFetcher(org.apache.flink.connector.file.table.PartitionFetcher) ArrayList(java.util.ArrayList) List(java.util.List)

Example 2 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveParserRexNodeConverter method convertConstant.

public static RexNode convertConstant(ExprNodeConstantDesc literal, RelOptCluster cluster) throws SemanticException {
    RexBuilder rexBuilder = cluster.getRexBuilder();
    RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
    PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
    RelDataType calciteDataType = HiveParserTypeConverter.convert(hiveType, dtFactory);
    PrimitiveObjectInspector.PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
    ConstantObjectInspector coi = literal.getWritableObjectInspector();
    Object value = ObjectInspectorUtils.copyToStandardJavaObject(coi.getWritableConstantValue(), coi);
    RexNode calciteLiteral;
    HiveShim hiveShim = HiveParserUtils.getSessionHiveShim();
    // If value is null, the type should also be VOID.
    if (value == null) {
        hiveTypeCategory = PrimitiveObjectInspector.PrimitiveCategory.VOID;
    }
    // TODO: Verify if we need to use ConstantObjectInspector to unwrap data
    switch(hiveTypeCategory) {
        case BOOLEAN:
            calciteLiteral = rexBuilder.makeLiteral((Boolean) value);
            break;
        case BYTE:
            calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Byte) value), calciteDataType);
            break;
        case SHORT:
            calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Short) value), calciteDataType);
            break;
        case INT:
            calciteLiteral = rexBuilder.makeExactLiteral(new BigDecimal((Integer) value));
            break;
        case LONG:
            calciteLiteral = rexBuilder.makeBigintLiteral(new BigDecimal((Long) value));
            break;
        // TODO: is Decimal an exact numeric or approximate numeric?
        case DECIMAL:
            if (value instanceof HiveDecimal) {
                value = ((HiveDecimal) value).bigDecimalValue();
            } else if (value instanceof Decimal128) {
                value = ((Decimal128) value).toBigDecimal();
            }
            if (value == null) {
                // For now, we will not run CBO in the presence of invalid decimal literals.
                throw new SemanticException("Expression " + literal.getExprString() + " is not a valid decimal");
            // TODO: return createNullLiteral(literal);
            }
            BigDecimal bd = (BigDecimal) value;
            BigInteger unscaled = bd.unscaledValue();
            if (unscaled.compareTo(MIN_LONG_BI) >= 0 && unscaled.compareTo(MAX_LONG_BI) <= 0) {
                calciteLiteral = rexBuilder.makeExactLiteral(bd);
            } else {
                // CBO doesn't support unlimited precision decimals. In practice, this
                // will work...
                // An alternative would be to throw CboSemanticException and fall back
                // to no CBO.
                RelDataType relType = cluster.getTypeFactory().createSqlType(SqlTypeName.DECIMAL, unscaled.toString().length(), bd.scale());
                calciteLiteral = rexBuilder.makeExactLiteral(bd, relType);
            }
            break;
        case FLOAT:
            calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Float.toString((Float) value)), calciteDataType);
            break;
        case DOUBLE:
            // TODO: The best solution is to support NaN in expression reduction.
            if (Double.isNaN((Double) value)) {
                throw new SemanticException("NaN");
            }
            calciteLiteral = rexBuilder.makeApproxLiteral(new BigDecimal(Double.toString((Double) value)), calciteDataType);
            break;
        case CHAR:
            if (value instanceof HiveChar) {
                value = ((HiveChar) value).getValue();
            }
            calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
            break;
        case VARCHAR:
            if (value instanceof HiveVarchar) {
                value = ((HiveVarchar) value).getValue();
            }
            calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value));
            break;
        case STRING:
            Object constantDescVal = literal.getValue();
            constantDescVal = constantDescVal instanceof NlsString ? constantDescVal : asUnicodeString((String) value);
            // calcite treat string literal as char type, we should treat it as string just like
            // hive
            RelDataType type = HiveParserTypeConverter.convert(hiveType, dtFactory);
            // if we get here, the value is not null
            type = dtFactory.createTypeWithNullability(type, false);
            calciteLiteral = rexBuilder.makeLiteral(constantDescVal, type, true);
            break;
        case DATE:
            LocalDate localDate = HiveParserUtils.getSessionHiveShim().toFlinkDate(value);
            DateString dateString = new DateString(localDate.getYear(), localDate.getMonthValue(), localDate.getDayOfMonth());
            calciteLiteral = rexBuilder.makeDateLiteral(dateString);
            break;
        case TIMESTAMP:
            TimestampString timestampString;
            if (value instanceof Calendar) {
                timestampString = TimestampString.fromCalendarFields((Calendar) value);
            } else {
                LocalDateTime localDateTime = HiveParserUtils.getSessionHiveShim().toFlinkTimestamp(value);
                timestampString = new TimestampString(localDateTime.getYear(), localDateTime.getMonthValue(), localDateTime.getDayOfMonth(), localDateTime.getHour(), localDateTime.getMinute(), localDateTime.getSecond());
                timestampString = timestampString.withNanos(localDateTime.getNano());
            }
            // hive always treats timestamp with precision 9
            calciteLiteral = rexBuilder.makeTimestampLiteral(timestampString, 9);
            break;
        case VOID:
            calciteLiteral = cluster.getRexBuilder().makeLiteral(null, dtFactory.createSqlType(SqlTypeName.NULL), true);
            break;
        case BINARY:
        case UNKNOWN:
        default:
            if (hiveShim.isIntervalYearMonthType(hiveTypeCategory)) {
                // Calcite year-month literal value is months as BigDecimal
                BigDecimal totalMonths = BigDecimal.valueOf(((HiveParserIntervalYearMonth) value).getTotalMonths());
                calciteLiteral = rexBuilder.makeIntervalLiteral(totalMonths, new SqlIntervalQualifier(TimeUnit.YEAR, TimeUnit.MONTH, new SqlParserPos(1, 1)));
            } else if (hiveShim.isIntervalDayTimeType(hiveTypeCategory)) {
                // Calcite day-time interval is millis value as BigDecimal
                // Seconds converted to millis
                BigDecimal secsValueBd = BigDecimal.valueOf(((HiveParserIntervalDayTime) value).getTotalSeconds() * 1000);
                // Nanos converted to millis
                BigDecimal nanosValueBd = BigDecimal.valueOf(((HiveParserIntervalDayTime) value).getNanos(), 6);
                calciteLiteral = rexBuilder.makeIntervalLiteral(secsValueBd.add(nanosValueBd), new SqlIntervalQualifier(TimeUnit.MILLISECOND, null, new SqlParserPos(1, 1)));
            } else {
                throw new RuntimeException("UnSupported Literal type " + hiveTypeCategory);
            }
    }
    return calciteLiteral;
}
Also used : LocalDateTime(java.time.LocalDateTime) HiveChar(org.apache.hadoop.hive.common.type.HiveChar) RelDataType(org.apache.calcite.rel.type.RelDataType) LocalDate(java.time.LocalDate) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) RelDataTypeFactory(org.apache.calcite.rel.type.RelDataTypeFactory) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) RexBuilder(org.apache.calcite.rex.RexBuilder) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException) SqlParserPos(org.apache.calcite.sql.parser.SqlParserPos) SqlIntervalQualifier(org.apache.calcite.sql.SqlIntervalQualifier) Calendar(java.util.Calendar) Decimal128(org.apache.hadoop.hive.common.type.Decimal128) HiveVarchar(org.apache.hadoop.hive.common.type.HiveVarchar) BigDecimal(java.math.BigDecimal) DateString(org.apache.calcite.util.DateString) BigInteger(java.math.BigInteger) NlsString(org.apache.calcite.util.NlsString) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) TimestampString(org.apache.calcite.util.TimestampString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) HiveParserIntervalDayTime(org.apache.flink.table.planner.delegation.hive.copy.HiveParserIntervalDayTime) RexNode(org.apache.calcite.rex.RexNode)

Example 3 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveParser method parse.

@Override
public List<Operation> parse(String statement) {
    CatalogManager catalogManager = getCatalogManager();
    Catalog currentCatalog = catalogManager.getCatalog(catalogManager.getCurrentCatalog()).orElse(null);
    if (!(currentCatalog instanceof HiveCatalog)) {
        LOG.warn("Current catalog is not HiveCatalog. Falling back to Flink's planner.");
        return super.parse(statement);
    }
    HiveConf hiveConf = new HiveConf(((HiveCatalog) currentCatalog).getHiveConf());
    hiveConf.setVar(HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
    hiveConf.set("hive.allow.udf.load.on.demand", "false");
    hiveConf.setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr");
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(((HiveCatalog) currentCatalog).getHiveVersion());
    try {
        // creates SessionState
        startSessionState(hiveConf, catalogManager);
        // We override Hive's grouping function. Refer to the implementation for more details.
        hiveShim.registerTemporaryFunction("grouping", HiveGenericUDFGrouping.class);
        return processCmd(statement, hiveConf, hiveShim, (HiveCatalog) currentCatalog);
    } finally {
        clearSessionState();
    }
}
Also used : HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) HiveConf(org.apache.hadoop.hive.conf.HiveConf) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) CatalogManager(org.apache.flink.table.catalog.CatalogManager) HiveCatalog(org.apache.flink.table.catalog.hive.HiveCatalog) Catalog(org.apache.flink.table.catalog.Catalog)

Example 4 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveStatsUtil method getColumnStatisticsData.

/**
 * Convert Flink ColumnStats to Hive ColumnStatisticsData according to Hive column type. Note we
 * currently assume that, in Flink, the max and min of ColumnStats will be same type as the
 * Flink column type. For example, for SHORT and Long columns, the max and min of their
 * ColumnStats should be of type SHORT and LONG.
 */
private static ColumnStatisticsData getColumnStatisticsData(DataType colType, CatalogColumnStatisticsDataBase colStat, String hiveVersion) {
    LogicalTypeRoot type = colType.getLogicalType().getTypeRoot();
    if (type.equals(LogicalTypeRoot.CHAR) || type.equals(LogicalTypeRoot.VARCHAR)) {
        if (colStat instanceof CatalogColumnStatisticsDataString) {
            CatalogColumnStatisticsDataString stringColStat = (CatalogColumnStatisticsDataString) colStat;
            StringColumnStatsData hiveStringColumnStats = new StringColumnStatsData();
            hiveStringColumnStats.clear();
            if (null != stringColStat.getMaxLength()) {
                hiveStringColumnStats.setMaxColLen(stringColStat.getMaxLength());
            }
            if (null != stringColStat.getAvgLength()) {
                hiveStringColumnStats.setAvgColLen(stringColStat.getAvgLength());
            }
            if (null != stringColStat.getNullCount()) {
                hiveStringColumnStats.setNumNulls(stringColStat.getNullCount());
            }
            if (null != stringColStat.getNdv()) {
                hiveStringColumnStats.setNumDVs(stringColStat.getNdv());
            }
            return ColumnStatisticsData.stringStats(hiveStringColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.BOOLEAN)) {
        if (colStat instanceof CatalogColumnStatisticsDataBoolean) {
            CatalogColumnStatisticsDataBoolean booleanColStat = (CatalogColumnStatisticsDataBoolean) colStat;
            BooleanColumnStatsData hiveBoolStats = new BooleanColumnStatsData();
            hiveBoolStats.clear();
            if (null != booleanColStat.getTrueCount()) {
                hiveBoolStats.setNumTrues(booleanColStat.getTrueCount());
            }
            if (null != booleanColStat.getFalseCount()) {
                hiveBoolStats.setNumFalses(booleanColStat.getFalseCount());
            }
            if (null != booleanColStat.getNullCount()) {
                hiveBoolStats.setNumNulls(booleanColStat.getNullCount());
            }
            return ColumnStatisticsData.booleanStats(hiveBoolStats);
        }
    } else if (type.equals(LogicalTypeRoot.TINYINT) || type.equals(LogicalTypeRoot.SMALLINT) || type.equals(LogicalTypeRoot.INTEGER) || type.equals(LogicalTypeRoot.BIGINT) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE) || type.equals(LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE) || type.equals(LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)) {
        if (colStat instanceof CatalogColumnStatisticsDataLong) {
            CatalogColumnStatisticsDataLong longColStat = (CatalogColumnStatisticsDataLong) colStat;
            LongColumnStatsData hiveLongColStats = new LongColumnStatsData();
            hiveLongColStats.clear();
            if (null != longColStat.getMax()) {
                hiveLongColStats.setHighValue(longColStat.getMax());
            }
            if (null != longColStat.getMin()) {
                hiveLongColStats.setLowValue(longColStat.getMin());
            }
            if (null != longColStat.getNdv()) {
                hiveLongColStats.setNumDVs(longColStat.getNdv());
            }
            if (null != longColStat.getNullCount()) {
                hiveLongColStats.setNumNulls(longColStat.getNullCount());
            }
            return ColumnStatisticsData.longStats(hiveLongColStats);
        }
    } else if (type.equals(LogicalTypeRoot.FLOAT) || type.equals(LogicalTypeRoot.DOUBLE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble doubleColumnStatsData = (CatalogColumnStatisticsDataDouble) colStat;
            DoubleColumnStatsData hiveFloatStats = new DoubleColumnStatsData();
            hiveFloatStats.clear();
            if (null != doubleColumnStatsData.getMax()) {
                hiveFloatStats.setHighValue(doubleColumnStatsData.getMax());
            }
            if (null != doubleColumnStatsData.getMin()) {
                hiveFloatStats.setLowValue(doubleColumnStatsData.getMin());
            }
            if (null != doubleColumnStatsData.getNullCount()) {
                hiveFloatStats.setNumNulls(doubleColumnStatsData.getNullCount());
            }
            if (null != doubleColumnStatsData.getNdv()) {
                hiveFloatStats.setNumDVs(doubleColumnStatsData.getNdv());
            }
            return ColumnStatisticsData.doubleStats(hiveFloatStats);
        }
    } else if (type.equals(LogicalTypeRoot.DATE)) {
        if (colStat instanceof CatalogColumnStatisticsDataDate) {
            HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
            return hiveShim.toHiveDateColStats((CatalogColumnStatisticsDataDate) colStat);
        }
    } else if (type.equals(LogicalTypeRoot.VARBINARY) || type.equals(LogicalTypeRoot.BINARY)) {
        if (colStat instanceof CatalogColumnStatisticsDataBinary) {
            CatalogColumnStatisticsDataBinary binaryColumnStatsData = (CatalogColumnStatisticsDataBinary) colStat;
            BinaryColumnStatsData hiveBinaryColumnStats = new BinaryColumnStatsData();
            hiveBinaryColumnStats.clear();
            if (null != binaryColumnStatsData.getMaxLength()) {
                hiveBinaryColumnStats.setMaxColLen(binaryColumnStatsData.getMaxLength());
            }
            if (null != binaryColumnStatsData.getAvgLength()) {
                hiveBinaryColumnStats.setAvgColLen(binaryColumnStatsData.getAvgLength());
            }
            if (null != binaryColumnStatsData.getNullCount()) {
                hiveBinaryColumnStats.setNumNulls(binaryColumnStatsData.getNullCount());
            }
            return ColumnStatisticsData.binaryStats(hiveBinaryColumnStats);
        }
    } else if (type.equals(LogicalTypeRoot.DECIMAL)) {
        if (colStat instanceof CatalogColumnStatisticsDataDouble) {
            CatalogColumnStatisticsDataDouble flinkStats = (CatalogColumnStatisticsDataDouble) colStat;
            DecimalColumnStatsData hiveStats = new DecimalColumnStatsData();
            if (flinkStats.getMax() != null) {
                // in older versions we cannot create HiveDecimal from Double, so convert Double
                // to BigDecimal first
                hiveStats.setHighValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMax()))));
            }
            if (flinkStats.getMin() != null) {
                hiveStats.setLowValue(toThriftDecimal(HiveDecimal.create(BigDecimal.valueOf(flinkStats.getMin()))));
            }
            if (flinkStats.getNdv() != null) {
                hiveStats.setNumDVs(flinkStats.getNdv());
            }
            if (flinkStats.getNullCount() != null) {
                hiveStats.setNumNulls(flinkStats.getNullCount());
            }
            return ColumnStatisticsData.decimalStats(hiveStats);
        }
    }
    throw new CatalogException(String.format("Flink does not support converting ColumnStats '%s' for Hive column " + "type '%s' yet", colStat, colType));
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) CatalogColumnStatisticsDataDate(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDate) CatalogException(org.apache.flink.table.catalog.exceptions.CatalogException) LogicalTypeRoot(org.apache.flink.table.types.logical.LogicalTypeRoot) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 5 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveGenericUDTF method open.

@Override
public void open(FunctionContext context) throws Exception {
    function = hiveFunctionWrapper.createFunction();
    function.setCollector(input -> {
        Row row = (Row) HiveInspectors.toFlinkObject(returnInspector, input, hiveShim);
        HiveGenericUDTF.this.collect(row);
    });
    ObjectInspector[] argumentInspectors = HiveInspectors.toInspectors(hiveShim, constantArguments, argTypes);
    returnInspector = function.initialize(argumentInspectors);
    isArgsSingleArray = HiveFunctionUtil.isSingleBoxedArray(argTypes);
    conversions = new HiveObjectConversion[argumentInspectors.length];
    for (int i = 0; i < argumentInspectors.length; i++) {
        conversions[i] = HiveInspectors.getConversion(argumentInspectors[i], argTypes[i].getLogicalType(), hiveShim);
    }
    allIdentityConverter = Arrays.stream(conversions).allMatch(conv -> conv instanceof IdentityConversion);
}
Also used : TableFunction(org.apache.flink.table.functions.TableFunction) DataType(org.apache.flink.table.types.DataType) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) GenericUDTF(org.apache.hadoop.hive.ql.udf.generic.GenericUDTF) HiveTypeUtil(org.apache.flink.table.catalog.hive.util.HiveTypeUtil) TypeInfoLogicalTypeConverter(org.apache.flink.table.runtime.types.TypeInfoLogicalTypeConverter) IdentityConversion(org.apache.flink.table.functions.hive.conversion.IdentityConversion) LoggerFactory(org.slf4j.LoggerFactory) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) Collector(org.apache.hadoop.hive.ql.udf.generic.Collector) VisibleForTesting(org.apache.flink.annotation.VisibleForTesting) HiveInspectors(org.apache.flink.table.functions.hive.conversion.HiveInspectors) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) HiveObjectConversion(org.apache.flink.table.functions.hive.conversion.HiveObjectConversion) HiveFunctionUtil(org.apache.flink.table.functions.hive.util.HiveFunctionUtil) Internal(org.apache.flink.annotation.Internal) Row(org.apache.flink.types.Row) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) FunctionContext(org.apache.flink.table.functions.FunctionContext) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) IdentityConversion(org.apache.flink.table.functions.hive.conversion.IdentityConversion) Row(org.apache.flink.types.Row)

Aggregations

HiveShim (org.apache.flink.table.catalog.hive.client.HiveShim)13 DataType (org.apache.flink.table.types.DataType)5 List (java.util.List)4 ArrayList (java.util.ArrayList)3 RelDataType (org.apache.calcite.rel.type.RelDataType)3 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)3 CatalogTable (org.apache.flink.table.catalog.CatalogTable)3 CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)3 JobConf (org.apache.hadoop.mapred.JobConf)3 Logger (org.slf4j.Logger)3 LoggerFactory (org.slf4j.LoggerFactory)3 BigDecimal (java.math.BigDecimal)2 Arrays (java.util.Arrays)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 SqlIntervalQualifier (org.apache.calcite.sql.SqlIntervalQualifier)2 SqlParserPos (org.apache.calcite.sql.parser.SqlParserPos)2 Internal (org.apache.flink.annotation.Internal)2 Configuration (org.apache.flink.configuration.Configuration)2 ReadableConfig (org.apache.flink.configuration.ReadableConfig)2