Search in sources :

Example 6 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class HiveParserUtils method getGenericUDAFEvaluator.

// Returns the GenericUDAFEvaluator for the aggregation. This is called once for each GroupBy
// aggregation.
// TODO: Requiring a GenericUDAFEvaluator means we only support hive UDAFs. Need to avoid this
// to support flink UDAFs.
public static GenericUDAFEvaluator getGenericUDAFEvaluator(String aggName, ArrayList<ExprNodeDesc> aggParameters, HiveParserASTNode aggTree, boolean isDistinct, boolean isAllColumns, SqlOperatorTable opTable) throws SemanticException {
    ArrayList<ObjectInspector> originalParameterTypeInfos = getWritableObjectInspector(aggParameters);
    GenericUDAFEvaluator result = FunctionRegistry.getGenericUDAFEvaluator(aggName, originalParameterTypeInfos, isDistinct, isAllColumns);
    if (result == null) {
        // this happens for temp functions
        SqlOperator sqlOperator = getSqlOperator(aggName, opTable, SqlFunctionCategory.USER_DEFINED_FUNCTION);
        if (sqlOperator instanceof HiveAggSqlFunction) {
            HiveGenericUDAF hiveGenericUDAF = (HiveGenericUDAF) ((HiveAggSqlFunction) sqlOperator).makeFunction(new Object[0], new LogicalType[0]);
            result = hiveGenericUDAF.createEvaluator(originalParameterTypeInfos.toArray(new ObjectInspector[0]));
        }
    }
    if (null == result) {
        String reason = "Looking for UDAF Evaluator\"" + aggName + "\" with parameters " + originalParameterTypeInfos;
        throw new SemanticException(HiveParserErrorMsg.getMsg(ErrorMsg.INVALID_FUNCTION_SIGNATURE, aggTree.getChild(0), reason));
    }
    return result;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) GenericUDAFEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator) SqlOperator(org.apache.calcite.sql.SqlOperator) HiveGenericUDAF(org.apache.flink.table.functions.hive.HiveGenericUDAF) LogicalType(org.apache.flink.table.types.logical.LogicalType) NlsString(org.apache.calcite.util.NlsString) HiveAggSqlFunction(org.apache.flink.table.planner.functions.utils.HiveAggSqlFunction) SemanticException(org.apache.hadoop.hive.ql.parse.SemanticException)

Example 7 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class HiveTypeUtil method toHiveTypeInfo.

/**
 * Convert Flink DataType to Hive TypeInfo. For types with a precision parameter, e.g.
 * timestamp, the supported precisions in Hive and Flink can be different. Therefore the
 * conversion will fail for those types if the precision is not supported by Hive and
 * checkPrecision is true.
 *
 * @param dataType a Flink DataType
 * @param checkPrecision whether to fail the conversion if the precision of the DataType is not
 *     supported by Hive
 * @return the corresponding Hive data type
 */
public static TypeInfo toHiveTypeInfo(DataType dataType, boolean checkPrecision) {
    checkNotNull(dataType, "type cannot be null");
    LogicalType logicalType = dataType.getLogicalType();
    return logicalType.accept(new TypeInfoLogicalTypeVisitor(dataType, checkPrecision));
}
Also used : LogicalType(org.apache.flink.table.types.logical.LogicalType)

Example 8 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class ArrowUtils method createArrowFieldWriterForArray.

private static ArrowFieldWriter<ArrayData> createArrowFieldWriterForArray(ValueVector vector, LogicalType fieldType) {
    if (vector instanceof TinyIntVector) {
        return TinyIntWriter.forArray((TinyIntVector) vector);
    } else if (vector instanceof SmallIntVector) {
        return SmallIntWriter.forArray((SmallIntVector) vector);
    } else if (vector instanceof IntVector) {
        return IntWriter.forArray((IntVector) vector);
    } else if (vector instanceof BigIntVector) {
        return BigIntWriter.forArray((BigIntVector) vector);
    } else if (vector instanceof BitVector) {
        return BooleanWriter.forArray((BitVector) vector);
    } else if (vector instanceof Float4Vector) {
        return FloatWriter.forArray((Float4Vector) vector);
    } else if (vector instanceof Float8Vector) {
        return DoubleWriter.forArray((Float8Vector) vector);
    } else if (vector instanceof VarCharVector) {
        return VarCharWriter.forArray((VarCharVector) vector);
    } else if (vector instanceof VarBinaryVector) {
        return VarBinaryWriter.forArray((VarBinaryVector) vector);
    } else if (vector instanceof DecimalVector) {
        DecimalVector decimalVector = (DecimalVector) vector;
        return DecimalWriter.forArray(decimalVector, getPrecision(decimalVector), decimalVector.getScale());
    } else if (vector instanceof DateDayVector) {
        return DateWriter.forArray((DateDayVector) vector);
    } else if (vector instanceof TimeSecVector || vector instanceof TimeMilliVector || vector instanceof TimeMicroVector || vector instanceof TimeNanoVector) {
        return TimeWriter.forArray(vector);
    } else if (vector instanceof TimeStampVector && ((ArrowType.Timestamp) vector.getField().getType()).getTimezone() == null) {
        int precision;
        if (fieldType instanceof LocalZonedTimestampType) {
            precision = ((LocalZonedTimestampType) fieldType).getPrecision();
        } else {
            precision = ((TimestampType) fieldType).getPrecision();
        }
        return TimestampWriter.forArray(vector, precision);
    } else if (vector instanceof ListVector) {
        ListVector listVector = (ListVector) vector;
        LogicalType elementType = ((ArrayType) fieldType).getElementType();
        return ArrayWriter.forArray(listVector, createArrowFieldWriterForArray(listVector.getDataVector(), elementType));
    } else if (vector instanceof StructVector) {
        RowType rowType = (RowType) fieldType;
        ArrowFieldWriter<RowData>[] fieldsWriters = new ArrowFieldWriter[rowType.getFieldCount()];
        for (int i = 0; i < fieldsWriters.length; i++) {
            fieldsWriters[i] = createArrowFieldWriterForRow(((StructVector) vector).getVectorById(i), rowType.getTypeAt(i));
        }
        return RowWriter.forArray((StructVector) vector, fieldsWriters);
    } else {
        throw new UnsupportedOperationException(String.format("Unsupported type %s.", fieldType));
    }
}
Also used : BitVector(org.apache.arrow.vector.BitVector) StructVector(org.apache.arrow.vector.complex.StructVector) Float4Vector(org.apache.arrow.vector.Float4Vector) TimeSecVector(org.apache.arrow.vector.TimeSecVector) TimeMilliVector(org.apache.arrow.vector.TimeMilliVector) LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) ArrowFieldWriter(org.apache.flink.table.runtime.arrow.writers.ArrowFieldWriter) VarBinaryVector(org.apache.arrow.vector.VarBinaryVector) DateDayVector(org.apache.arrow.vector.DateDayVector) DecimalVector(org.apache.arrow.vector.DecimalVector) TimeNanoVector(org.apache.arrow.vector.TimeNanoVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) BigIntVector(org.apache.arrow.vector.BigIntVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector) IntVector(org.apache.arrow.vector.IntVector) SmallIntVector(org.apache.arrow.vector.SmallIntVector) Float8Vector(org.apache.arrow.vector.Float8Vector) VarCharVector(org.apache.arrow.vector.VarCharVector) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) BigIntVector(org.apache.arrow.vector.BigIntVector) TimeStampVector(org.apache.arrow.vector.TimeStampVector) TimeMicroVector(org.apache.arrow.vector.TimeMicroVector) ListVector(org.apache.arrow.vector.complex.ListVector) TinyIntVector(org.apache.arrow.vector.TinyIntVector)

Example 9 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class ElasticsearchDynamicSinkFactoryBase method getPrimaryKeyLogicalTypesWithIndex.

List<LogicalTypeWithIndex> getPrimaryKeyLogicalTypesWithIndex(Context context) {
    DataType physicalRowDataType = context.getPhysicalRowDataType();
    int[] primaryKeyIndexes = context.getPrimaryKeyIndexes();
    if (primaryKeyIndexes.length != 0) {
        DataType pkDataType = Projection.of(primaryKeyIndexes).project(physicalRowDataType);
        ElasticsearchValidationUtils.validatePrimaryKey(pkDataType);
    }
    ResolvedSchema resolvedSchema = context.getCatalogTable().getResolvedSchema();
    return Arrays.stream(primaryKeyIndexes).mapToObj(index -> {
        Optional<Column> column = resolvedSchema.getColumn(index);
        if (!column.isPresent()) {
            throw new IllegalStateException(String.format("No primary key column found with index '%s'.", index));
        }
        LogicalType logicalType = column.get().getDataType().getLogicalType();
        return new LogicalTypeWithIndex(index, logicalType);
    }).collect(Collectors.toList());
}
Also used : DataType(org.apache.flink.table.types.DataType) EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) Arrays(java.util.Arrays) BULK_FLUSH_MAX_SIZE_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_MAX_SIZE_OPTION) SerializationFormatFactory(org.apache.flink.table.factories.SerializationFormatFactory) HOSTS_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.HOSTS_OPTION) INDEX_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.INDEX_OPTION) Column(org.apache.flink.table.catalog.Column) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) USERNAME_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.USERNAME_OPTION) Supplier(java.util.function.Supplier) TableConfigOptions(org.apache.flink.table.api.config.TableConfigOptions) ReadableConfig(org.apache.flink.configuration.ReadableConfig) BULK_FLUSH_MAX_ACTIONS_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_MAX_ACTIONS_OPTION) SOCKET_TIMEOUT(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.SOCKET_TIMEOUT) ConfigOption(org.apache.flink.configuration.ConfigOption) Preconditions.checkNotNull(org.apache.flink.util.Preconditions.checkNotNull) BULK_FLUSH_BACKOFF_TYPE_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_BACKOFF_TYPE_OPTION) PASSWORD_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.PASSWORD_OPTION) Nullable(javax.annotation.Nullable) Projection(org.apache.flink.table.connector.Projection) RowData(org.apache.flink.table.data.RowData) DynamicTableSink(org.apache.flink.table.connector.sink.DynamicTableSink) CONNECTION_REQUEST_TIMEOUT(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.CONNECTION_REQUEST_TIMEOUT) KEY_DELIMITER_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.KEY_DELIMITER_OPTION) Strings.capitalize(org.elasticsearch.common.Strings.capitalize) DynamicTableSinkFactory(org.apache.flink.table.factories.DynamicTableSinkFactory) BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_BACKOFF_MAX_RETRIES_OPTION) Set(java.util.Set) SINK_PARALLELISM(org.apache.flink.table.factories.FactoryUtil.SINK_PARALLELISM) StringUtils(org.apache.flink.util.StringUtils) Collectors(java.util.stream.Collectors) ZoneId(java.time.ZoneId) CONNECTION_TIMEOUT(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.CONNECTION_TIMEOUT) BULK_FLUSH_INTERVAL_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_INTERVAL_OPTION) List(java.util.List) Stream(java.util.stream.Stream) DELIVERY_GUARANTEE_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.DELIVERY_GUARANTEE_OPTION) FactoryUtil(org.apache.flink.table.factories.FactoryUtil) LogicalType(org.apache.flink.table.types.logical.LogicalType) FORMAT_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.FORMAT_OPTION) ValidationException(org.apache.flink.table.api.ValidationException) BULK_FLUSH_BACKOFF_DELAY_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.BULK_FLUSH_BACKOFF_DELAY_OPTION) Optional(java.util.Optional) Internal(org.apache.flink.annotation.Internal) SerializationSchema(org.apache.flink.api.common.serialization.SerializationSchema) CONNECTION_PATH_PREFIX_OPTION(org.apache.flink.connector.elasticsearch.table.ElasticsearchConnectorOptions.CONNECTION_PATH_PREFIX_OPTION) Optional(java.util.Optional) DataType(org.apache.flink.table.types.DataType) LogicalType(org.apache.flink.table.types.logical.LogicalType) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema)

Example 10 with LogicalType

use of org.apache.flink.table.types.logical.LogicalType in project flink by apache.

the class OrcNoHiveColumnarRowInputFormat method createPartitionedFormat.

/**
 * Create a partitioned {@link OrcColumnarRowInputFormat}, the partition columns can be
 * generated by split.
 */
public static <SplitT extends FileSourceSplit> OrcColumnarRowInputFormat<VectorizedRowBatch, SplitT> createPartitionedFormat(Configuration hadoopConfig, RowType tableType, List<String> partitionKeys, PartitionFieldExtractor<SplitT> extractor, int[] selectedFields, List<OrcFilters.Predicate> conjunctPredicates, int batchSize, Function<RowType, TypeInformation<RowData>> rowTypeInfoFactory) {
    // TODO FLINK-25113 all this partition keys code should be pruned from the orc format,
    // because now FileSystemTableSource uses FileInfoExtractorBulkFormat for reading partition
    // keys.
    String[] tableFieldNames = tableType.getFieldNames().toArray(new String[0]);
    LogicalType[] tableFieldTypes = tableType.getChildren().toArray(new LogicalType[0]);
    List<String> orcFieldNames = getNonPartNames(tableFieldNames, partitionKeys);
    int[] orcSelectedFields = getSelectedOrcFields(tableFieldNames, selectedFields, orcFieldNames);
    ColumnBatchFactory<VectorizedRowBatch, SplitT> batchGenerator = (SplitT split, VectorizedRowBatch rowBatch) -> {
        // create and initialize the row batch
        ColumnVector[] vectors = new ColumnVector[selectedFields.length];
        for (int i = 0; i < vectors.length; i++) {
            String name = tableFieldNames[selectedFields[i]];
            LogicalType type = tableFieldTypes[selectedFields[i]];
            vectors[i] = partitionKeys.contains(name) ? createFlinkVectorFromConstant(type, extractor.extract(split, name, type), batchSize) : createFlinkVector(rowBatch.cols[orcFieldNames.indexOf(name)]);
        }
        return new VectorizedColumnBatch(vectors);
    };
    return new OrcColumnarRowInputFormat<>(new OrcNoHiveShim(), hadoopConfig, convertToOrcTypeWithPart(tableFieldNames, tableFieldTypes, partitionKeys), orcSelectedFields, conjunctPredicates, batchSize, batchGenerator, rowTypeInfoFactory.apply(new RowType(Arrays.stream(selectedFields).mapToObj(i -> tableType.getFields().get(i)).collect(Collectors.toList()))));
}
Also used : LogicalType(org.apache.flink.table.types.logical.LogicalType) RowType(org.apache.flink.table.types.logical.RowType) VectorizedRowBatch(org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch) VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) OrcColumnarRowInputFormat(org.apache.flink.orc.OrcColumnarRowInputFormat) OrcNoHiveShim(org.apache.flink.orc.nohive.shim.OrcNoHiveShim)

Aggregations

LogicalType (org.apache.flink.table.types.logical.LogicalType)192 DataType (org.apache.flink.table.types.DataType)53 RowType (org.apache.flink.table.types.logical.RowType)53 RowData (org.apache.flink.table.data.RowData)45 List (java.util.List)29 ArrayList (java.util.ArrayList)28 TableException (org.apache.flink.table.api.TableException)25 TimestampType (org.apache.flink.table.types.logical.TimestampType)25 Internal (org.apache.flink.annotation.Internal)21 IntType (org.apache.flink.table.types.logical.IntType)21 Map (java.util.Map)20 ValidationException (org.apache.flink.table.api.ValidationException)20 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)19 LocalZonedTimestampType (org.apache.flink.table.types.logical.LocalZonedTimestampType)17 Test (org.junit.Test)17 BigIntType (org.apache.flink.table.types.logical.BigIntType)16 LegacyTypeInformationType (org.apache.flink.table.types.logical.LegacyTypeInformationType)16 GenericRowData (org.apache.flink.table.data.GenericRowData)15 Arrays (java.util.Arrays)14