Search in sources :

Example 11 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveSimpleUDF method openInternal.

@Override
public void openInternal() {
    LOG.info("Opening HiveSimpleUDF as '{}'", hiveFunctionWrapper.getClassName());
    function = hiveFunctionWrapper.createFunction();
    List<TypeInfo> typeInfos = new ArrayList<>();
    for (DataType arg : argTypes) {
        typeInfos.add(HiveTypeUtil.toHiveTypeInfo(arg, false));
    }
    try {
        method = function.getResolver().getEvalMethod(typeInfos);
        returnInspector = ObjectInspectorFactory.getReflectionObjectInspector(method.getGenericReturnType(), ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        ObjectInspector[] argInspectors = new ObjectInspector[typeInfos.size()];
        for (int i = 0; i < argTypes.length; i++) {
            argInspectors[i] = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfos.get(i));
        }
        conversionHelper = new GenericUDFUtils.ConversionHelper(method, argInspectors);
        conversions = new HiveObjectConversion[argInspectors.length];
        for (int i = 0; i < argInspectors.length; i++) {
            conversions[i] = HiveInspectors.getConversion(argInspectors[i], argTypes[i].getLogicalType(), hiveShim);
        }
        allIdentityConverter = Arrays.stream(conversions).allMatch(conv -> conv instanceof IdentityConversion);
    } catch (Exception e) {
        throw new FlinkHiveUDFException(String.format("Failed to open HiveSimpleUDF from %s", hiveFunctionWrapper.getClassName()), e);
    }
}
Also used : DataType(org.apache.flink.table.types.DataType) TypeInfoUtils(org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) Arrays(java.util.Arrays) Logger(org.slf4j.Logger) HiveTypeUtil(org.apache.flink.table.catalog.hive.util.HiveTypeUtil) IdentityConversion(org.apache.flink.table.functions.hive.conversion.IdentityConversion) LoggerFactory(org.slf4j.LoggerFactory) GenericUDFUtils(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils) FunctionRegistry(org.apache.hadoop.hive.ql.exec.FunctionRegistry) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UDF(org.apache.hadoop.hive.ql.exec.UDF) HiveInspectors(org.apache.flink.table.functions.hive.conversion.HiveInspectors) List(java.util.List) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) Preconditions.checkArgument(org.apache.flink.util.Preconditions.checkArgument) HiveObjectConversion(org.apache.flink.table.functions.hive.conversion.HiveObjectConversion) Internal(org.apache.flink.annotation.Internal) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Method(java.lang.reflect.Method) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) IdentityConversion(org.apache.flink.table.functions.hive.conversion.IdentityConversion) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) DataType(org.apache.flink.table.types.DataType) GenericUDFUtils(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils)

Example 12 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveTablePartition method ofPartition.

/**
 * Creates a HiveTablePartition to represent a hive partition.
 *
 * @param hiveConf the HiveConf used to connect to HMS
 * @param hiveVersion the version of hive in use, if it's null the version will be automatically
 *     detected
 * @param dbName name of the database
 * @param tableName name of the table
 * @param partitionSpec map from each partition column to its value. The map should contain
 *     exactly all the partition columns and in the order in which the partition columns are
 *     defined
 */
public static HiveTablePartition ofPartition(HiveConf hiveConf, @Nullable String hiveVersion, String dbName, String tableName, LinkedHashMap<String, String> partitionSpec) {
    HiveShim hiveShim = getHiveShim(hiveVersion);
    try (HiveMetastoreClientWrapper client = new HiveMetastoreClientWrapper(hiveConf, hiveShim)) {
        Table hiveTable = client.getTable(dbName, tableName);
        Partition hivePartition = client.getPartition(dbName, tableName, new ArrayList<>(partitionSpec.values()));
        return new HiveTablePartition(hivePartition.getSd(), partitionSpec, HiveReflectionUtils.getTableMetadata(hiveShim, hiveTable));
    } catch (TException e) {
        throw new FlinkHiveException(String.format("Failed to create HiveTablePartition for partition %s of hive table %s.%s", partitionSpec, dbName, tableName), e);
    }
}
Also used : TException(org.apache.thrift.TException) Partition(org.apache.hadoop.hive.metastore.api.Partition) Table(org.apache.hadoop.hive.metastore.api.Table) HiveMetastoreClientWrapper(org.apache.flink.table.catalog.hive.client.HiveMetastoreClientWrapper) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Example 13 with HiveShim

use of org.apache.flink.table.catalog.hive.client.HiveShim in project flink by apache.

the class HiveStatsUtil method createTableColumnStats.

/**
 * Create Flink ColumnStats from Hive ColumnStatisticsData.
 */
private static CatalogColumnStatisticsDataBase createTableColumnStats(DataType colType, ColumnStatisticsData stats, String hiveVersion) {
    HiveShim hiveShim = HiveShimLoader.loadHiveShim(hiveVersion);
    if (stats.isSetBinaryStats()) {
        BinaryColumnStatsData binaryStats = stats.getBinaryStats();
        return new CatalogColumnStatisticsDataBinary(binaryStats.isSetMaxColLen() ? binaryStats.getMaxColLen() : null, binaryStats.isSetAvgColLen() ? binaryStats.getAvgColLen() : null, binaryStats.isSetNumNulls() ? binaryStats.getNumNulls() : null);
    } else if (stats.isSetBooleanStats()) {
        BooleanColumnStatsData booleanStats = stats.getBooleanStats();
        return new CatalogColumnStatisticsDataBoolean(booleanStats.isSetNumTrues() ? booleanStats.getNumTrues() : null, booleanStats.isSetNumFalses() ? booleanStats.getNumFalses() : null, booleanStats.isSetNumNulls() ? booleanStats.getNumNulls() : null);
    } else if (hiveShim.isDateStats(stats)) {
        return hiveShim.toFlinkDateColStats(stats);
    } else if (stats.isSetDoubleStats()) {
        DoubleColumnStatsData doubleStats = stats.getDoubleStats();
        return new CatalogColumnStatisticsDataDouble(doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null, doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null, doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null, doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null);
    } else if (stats.isSetLongStats()) {
        LongColumnStatsData longColStats = stats.getLongStats();
        return new CatalogColumnStatisticsDataLong(longColStats.isSetLowValue() ? longColStats.getLowValue() : null, longColStats.isSetHighValue() ? longColStats.getHighValue() : null, longColStats.isSetNumDVs() ? longColStats.getNumDVs() : null, longColStats.isSetNumNulls() ? longColStats.getNumNulls() : null);
    } else if (stats.isSetStringStats()) {
        StringColumnStatsData stringStats = stats.getStringStats();
        return new CatalogColumnStatisticsDataString(stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null, stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null, stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null, stringStats.isSetNumDVs() ? stringStats.getNumNulls() : null);
    } else if (stats.isSetDecimalStats()) {
        DecimalColumnStatsData decimalStats = stats.getDecimalStats();
        // for now, just return CatalogColumnStatisticsDataDouble for decimal columns
        Double max = null;
        if (decimalStats.isSetHighValue()) {
            max = toHiveDecimal(decimalStats.getHighValue()).doubleValue();
        }
        Double min = null;
        if (decimalStats.isSetLowValue()) {
            min = toHiveDecimal(decimalStats.getLowValue()).doubleValue();
        }
        Long ndv = decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null;
        Long nullCount = decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null;
        return new CatalogColumnStatisticsDataDouble(min, max, ndv, nullCount);
    } else {
        LOG.warn("Flink does not support converting ColumnStatisticsData '{}' for Hive column type '{}' yet.", stats, colType);
        return null;
    }
}
Also used : BooleanColumnStatsData(org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData) StringColumnStatsData(org.apache.hadoop.hive.metastore.api.StringColumnStatsData) CatalogColumnStatisticsDataBinary(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBinary) LongColumnStatsData(org.apache.hadoop.hive.metastore.api.LongColumnStatsData) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) BinaryColumnStatsData(org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData) DoubleColumnStatsData(org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData) DecimalColumnStatsData(org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData) CatalogColumnStatisticsDataDouble(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataDouble) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataBoolean(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataBoolean) CatalogColumnStatisticsDataLong(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataLong) CatalogColumnStatisticsDataString(org.apache.flink.table.catalog.stats.CatalogColumnStatisticsDataString) HiveShim(org.apache.flink.table.catalog.hive.client.HiveShim)

Aggregations

HiveShim (org.apache.flink.table.catalog.hive.client.HiveShim)13 DataType (org.apache.flink.table.types.DataType)5 List (java.util.List)4 ArrayList (java.util.ArrayList)3 RelDataType (org.apache.calcite.rel.type.RelDataType)3 VisibleForTesting (org.apache.flink.annotation.VisibleForTesting)3 CatalogTable (org.apache.flink.table.catalog.CatalogTable)3 CatalogException (org.apache.flink.table.catalog.exceptions.CatalogException)3 JobConf (org.apache.hadoop.mapred.JobConf)3 Logger (org.slf4j.Logger)3 LoggerFactory (org.slf4j.LoggerFactory)3 BigDecimal (java.math.BigDecimal)2 Arrays (java.util.Arrays)2 HashMap (java.util.HashMap)2 Optional (java.util.Optional)2 SqlIntervalQualifier (org.apache.calcite.sql.SqlIntervalQualifier)2 SqlParserPos (org.apache.calcite.sql.parser.SqlParserPos)2 Internal (org.apache.flink.annotation.Internal)2 Configuration (org.apache.flink.configuration.Configuration)2 ReadableConfig (org.apache.flink.configuration.ReadableConfig)2