Search in sources :

Example 71 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class StreamExecLegacyTableSourceScan method createConversionTransformationIfNeeded.

@SuppressWarnings("unchecked")
@Override
protected Transformation<RowData> createConversionTransformationIfNeeded(StreamExecutionEnvironment streamExecEnv, ExecNodeConfig config, Transformation<?> sourceTransform, @Nullable RexNode rowtimeExpression) {
    final RowType outputType = (RowType) getOutputType();
    final Transformation<RowData> transformation;
    final int[] fieldIndexes = computeIndexMapping(true);
    if (needInternalConversion(fieldIndexes)) {
        final String extractElement, resetElement;
        if (ScanUtil.hasTimeAttributeField(fieldIndexes)) {
            String elementTerm = OperatorCodeGenerator.ELEMENT();
            extractElement = String.format("ctx.%s = %s;", elementTerm, elementTerm);
            resetElement = String.format("ctx.%s = null;", elementTerm);
        } else {
            extractElement = "";
            resetElement = "";
        }
        final CodeGeneratorContext ctx = new CodeGeneratorContext(config.getTableConfig()).setOperatorBaseClass(TableStreamOperator.class);
        // the produced type may not carry the correct precision user defined in DDL, because
        // it may be converted from legacy type. Fix precision using logical schema from DDL.
        // Code generation requires the correct precision of input fields.
        final DataType fixedProducedDataType = TableSourceUtil.fixPrecisionForProducedDataType(tableSource, outputType);
        transformation = ScanUtil.convertToInternalRow(ctx, (Transformation<Object>) sourceTransform, fieldIndexes, fixedProducedDataType, outputType, qualifiedName, (detailName, simplifyName) -> createFormattedTransformationName(detailName, simplifyName, config), (description) -> createFormattedTransformationDescription(description, config), JavaScalaConversionUtil.toScala(Optional.ofNullable(rowtimeExpression)), extractElement, resetElement);
    } else {
        transformation = (Transformation<RowData>) sourceTransform;
    }
    final RelDataType relDataType = FlinkTypeFactory.INSTANCE().buildRelNodeRowType(outputType);
    final DataStream<RowData> ingestedTable = new DataStream<>(streamExecEnv, transformation);
    final Optional<RowtimeAttributeDescriptor> rowtimeDesc = JavaScalaConversionUtil.toJava(TableSourceUtil.getRowtimeAttributeDescriptor(tableSource, relDataType));
    final DataStream<RowData> withWatermarks = rowtimeDesc.map(desc -> {
        int rowtimeFieldIdx = relDataType.getFieldNames().indexOf(desc.getAttributeName());
        WatermarkStrategy strategy = desc.getWatermarkStrategy();
        if (strategy instanceof PeriodicWatermarkAssigner) {
            PeriodicWatermarkAssignerWrapper watermarkGenerator = new PeriodicWatermarkAssignerWrapper((PeriodicWatermarkAssigner) strategy, rowtimeFieldIdx);
            return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
        } else if (strategy instanceof PunctuatedWatermarkAssigner) {
            PunctuatedWatermarkAssignerWrapper watermarkGenerator = new PunctuatedWatermarkAssignerWrapper((PunctuatedWatermarkAssigner) strategy, rowtimeFieldIdx, tableSource.getProducedDataType());
            return ingestedTable.assignTimestampsAndWatermarks(watermarkGenerator);
        } else {
            // underlying DataStream.
            return ingestedTable;
        }
    }).orElse(// No need to generate watermarks if no rowtime
    ingestedTable);
    // attribute is specified.
    return withWatermarks.getTransformation();
}
Also used : TableStreamOperator(org.apache.flink.table.runtime.operators.TableStreamOperator) DataType(org.apache.flink.table.types.DataType) TableSourceUtil(org.apache.flink.table.planner.sources.TableSourceUtil) RowtimeAttributeDescriptor(org.apache.flink.table.sources.RowtimeAttributeDescriptor) TableSource(org.apache.flink.table.sources.TableSource) PeriodicWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PeriodicWatermarkAssigner) FlinkTypeFactory(org.apache.flink.table.planner.calcite.FlinkTypeFactory) RowType(org.apache.flink.table.types.logical.RowType) ExecNode(org.apache.flink.table.planner.plan.nodes.exec.ExecNode) ScanUtil(org.apache.flink.table.planner.plan.utils.ScanUtil) RexNode(org.apache.calcite.rex.RexNode) InputFormat(org.apache.flink.api.common.io.InputFormat) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) Nullable(javax.annotation.Nullable) RelDataType(org.apache.calcite.rel.type.RelDataType) ExecNodeContext(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeContext) RowData(org.apache.flink.table.data.RowData) InputSplit(org.apache.flink.core.io.InputSplit) ExecNodeConfig(org.apache.flink.table.planner.plan.nodes.exec.ExecNodeConfig) WatermarkStrategy(org.apache.flink.table.sources.wmstrategies.WatermarkStrategy) PunctuatedWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PunctuatedWatermarkAssigner) StreamTableSource(org.apache.flink.table.sources.StreamTableSource) DataStream(org.apache.flink.streaming.api.datastream.DataStream) OperatorCodeGenerator(org.apache.flink.table.planner.codegen.OperatorCodeGenerator) CommonExecLegacyTableSourceScan(org.apache.flink.table.planner.plan.nodes.exec.common.CommonExecLegacyTableSourceScan) List(java.util.List) PunctuatedWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PunctuatedWatermarkAssignerWrapper) JavaScalaConversionUtil(org.apache.flink.table.planner.utils.JavaScalaConversionUtil) Optional(java.util.Optional) Transformation(org.apache.flink.api.dag.Transformation) PeriodicWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PeriodicWatermarkAssignerWrapper) StreamExecutionEnvironment(org.apache.flink.streaming.api.environment.StreamExecutionEnvironment) Transformation(org.apache.flink.api.dag.Transformation) CodeGeneratorContext(org.apache.flink.table.planner.codegen.CodeGeneratorContext) DataStream(org.apache.flink.streaming.api.datastream.DataStream) RowType(org.apache.flink.table.types.logical.RowType) RelDataType(org.apache.calcite.rel.type.RelDataType) PeriodicWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PeriodicWatermarkAssigner) RowData(org.apache.flink.table.data.RowData) PunctuatedWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PunctuatedWatermarkAssignerWrapper) RowtimeAttributeDescriptor(org.apache.flink.table.sources.RowtimeAttributeDescriptor) PunctuatedWatermarkAssigner(org.apache.flink.table.sources.wmstrategies.PunctuatedWatermarkAssigner) DataType(org.apache.flink.table.types.DataType) RelDataType(org.apache.calcite.rel.type.RelDataType) WatermarkStrategy(org.apache.flink.table.sources.wmstrategies.WatermarkStrategy) PeriodicWatermarkAssignerWrapper(org.apache.flink.table.runtime.operators.wmassigners.PeriodicWatermarkAssignerWrapper)

Example 72 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class ClassDataTypeConverter method addDefaultDataType.

private static void addDefaultDataType(Class<?> clazz, DataType rootType) {
    final DataType dataType;
    if (clazz.isPrimitive()) {
        dataType = rootType.notNull();
    } else {
        dataType = rootType.nullable();
    }
    defaultDataTypes.put(clazz.getName(), dataType.bridgedTo(clazz));
}
Also used : DataType(org.apache.flink.table.types.DataType) AtomicDataType(org.apache.flink.table.types.AtomicDataType)

Example 73 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class DataTypeUtils method stripRowPrefix.

/**
 * Removes a string prefix from the fields of the given row data type.
 */
public static DataType stripRowPrefix(DataType dataType, String prefix) {
    Preconditions.checkArgument(dataType.getLogicalType().is(ROW), "Row data type expected.");
    final RowType rowType = (RowType) dataType.getLogicalType();
    final List<String> newFieldNames = rowType.getFieldNames().stream().map(s -> {
        if (s.startsWith(prefix)) {
            return s.substring(prefix.length());
        }
        return s;
    }).collect(Collectors.toList());
    final LogicalType newRowType = LogicalTypeUtils.renameRowFields(rowType, newFieldNames);
    return new FieldsDataType(newRowType, dataType.getConversionClass(), dataType.getChildren());
}
Also used : DataType(org.apache.flink.table.types.DataType) AtomicDataType(org.apache.flink.table.types.AtomicDataType) Arrays(java.util.Arrays) ResolvedSchema(org.apache.flink.table.catalog.ResolvedSchema) StructuredType(org.apache.flink.table.types.logical.StructuredType) MapType(org.apache.flink.table.types.logical.MapType) RowField(org.apache.flink.table.types.logical.RowType.RowField) StructuredAttribute(org.apache.flink.table.types.logical.StructuredType.StructuredAttribute) Projection(org.apache.flink.table.connector.Projection) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) Preconditions(org.apache.flink.util.Preconditions) Collectors(java.util.stream.Collectors) LegacyTypeInformationType(org.apache.flink.table.types.logical.LegacyTypeInformationType) List(java.util.List) Stream(java.util.stream.Stream) CollectionDataType(org.apache.flink.table.types.CollectionDataType) DistinctType(org.apache.flink.table.types.logical.DistinctType) LogicalType(org.apache.flink.table.types.logical.LogicalType) ValidationException(org.apache.flink.table.api.ValidationException) Optional(java.util.Optional) DataTypeFactory(org.apache.flink.table.catalog.DataTypeFactory) IntStream(java.util.stream.IntStream) LogicalTypeUtils.toInternalConversionClass(org.apache.flink.table.types.logical.utils.LogicalTypeUtils.toInternalConversionClass) ObjectIdentifier(org.apache.flink.table.catalog.ObjectIdentifier) KeyValueDataType(org.apache.flink.table.types.KeyValueDataType) Column(org.apache.flink.table.catalog.Column) RowType(org.apache.flink.table.types.logical.RowType) TimestampKind(org.apache.flink.table.types.logical.TimestampKind) LogicalTypeFamily(org.apache.flink.table.types.logical.LogicalTypeFamily) FieldsDataType(org.apache.flink.table.types.FieldsDataType) DISTINCT_TYPE(org.apache.flink.table.types.logical.LogicalTypeRoot.DISTINCT_TYPE) LogicalTypeUtils.removeTimeAttributes(org.apache.flink.table.types.logical.utils.LogicalTypeUtils.removeTimeAttributes) LogicalTypeUtils(org.apache.flink.table.types.logical.utils.LogicalTypeUtils) Nullable(javax.annotation.Nullable) ROW(org.apache.flink.table.types.logical.LogicalTypeRoot.ROW) MultisetType(org.apache.flink.table.types.logical.MultisetType) LogicalTypeChecks.getFieldNames(org.apache.flink.table.types.logical.utils.LogicalTypeChecks.getFieldNames) LogicalTypeUtils.getAtomicName(org.apache.flink.table.types.logical.utils.LogicalTypeUtils.getAtomicName) CompositeType(org.apache.flink.api.common.typeutils.CompositeType) LogicalTypeChecks.isCompositeType(org.apache.flink.table.types.logical.utils.LogicalTypeChecks.isCompositeType) LogicalTypeDefaultVisitor(org.apache.flink.table.types.logical.utils.LogicalTypeDefaultVisitor) DataTypes(org.apache.flink.table.api.DataTypes) TypeTransformation(org.apache.flink.table.types.inference.TypeTransformation) ArrayType(org.apache.flink.table.types.logical.ArrayType) STRUCTURED_TYPE(org.apache.flink.table.types.logical.LogicalTypeRoot.STRUCTURED_TYPE) DataTypeVisitor(org.apache.flink.table.types.DataTypeVisitor) ExtractionUtils.primitiveToWrapper(org.apache.flink.table.types.extraction.ExtractionUtils.primitiveToWrapper) Internal(org.apache.flink.annotation.Internal) Collections(java.util.Collections) LogicalTypeChecks(org.apache.flink.table.types.logical.utils.LogicalTypeChecks) FieldsDataType(org.apache.flink.table.types.FieldsDataType) RowType(org.apache.flink.table.types.logical.RowType) LogicalType(org.apache.flink.table.types.logical.LogicalType)

Example 74 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class TypeInfoDataTypeConverter method convertToStructuredType.

private static DataType convertToStructuredType(DataTypeFactory dataTypeFactory, CompositeType<?> compositeType, boolean forceNullability) {
    final int arity = compositeType.getArity();
    final String[] fieldNames = compositeType.getFieldNames();
    final Class<?> typeClass = compositeType.getTypeClass();
    final Map<String, DataType> fieldDataTypes = new LinkedHashMap<>();
    IntStream.range(0, arity).forEachOrdered(pos -> fieldDataTypes.put(fieldNames[pos], toDataType(dataTypeFactory, compositeType.getTypeAt(pos))));
    final List<String> fieldNamesReordered;
    final boolean isNullable;
    // for POJOs and Avro records
    if (compositeType instanceof PojoTypeInfo) {
        final PojoTypeInfo<?> pojoTypeInfo = (PojoTypeInfo<?>) compositeType;
        final List<Field> pojoFields = IntStream.range(0, arity).mapToObj(pojoTypeInfo::getPojoFieldAt).map(PojoField::getField).collect(Collectors.toList());
        // POJO serializer supports top-level nulls
        isNullable = true;
        // based on type information all fields are boxed classes,
        // therefore we need to check the reflective field for more details
        fieldDataTypes.replaceAll((name, dataType) -> {
            final Class<?> fieldClass = pojoFields.stream().filter(f -> f.getName().equals(name)).findFirst().orElseThrow(IllegalStateException::new).getType();
            if (fieldClass.isPrimitive()) {
                return dataType.notNull().bridgedTo(fieldClass);
            }
            // serializer supports nullable fields
            return dataType.nullable();
        });
        // best effort extraction of the field order, if it fails we use the default order of
        // PojoTypeInfo which is alphabetical
        fieldNamesReordered = extractStructuredTypeFieldOrder(typeClass, pojoFields);
    } else // for tuples and case classes
    {
        // serializers don't support top-level nulls
        isNullable = forceNullability;
        // based on type information all fields are boxed classes,
        // but case classes might contain primitives
        fieldDataTypes.replaceAll((name, dataType) -> {
            try {
                final Class<?> fieldClass = getStructuredField(typeClass, name).getType();
                if (fieldClass.isPrimitive()) {
                    return dataType.notNull().bridgedTo(fieldClass);
                }
            } catch (Throwable t) {
            // ignore extraction errors and keep the original conversion class
            }
            return dataType;
        });
        // field order from type information is correct
        fieldNamesReordered = null;
    }
    final DataTypes.Field[] structuredFields;
    if (fieldNamesReordered != null) {
        structuredFields = fieldNamesReordered.stream().map(name -> DataTypes.FIELD(name, fieldDataTypes.get(name))).toArray(DataTypes.Field[]::new);
    } else {
        structuredFields = fieldDataTypes.entrySet().stream().map(e -> DataTypes.FIELD(e.getKey(), e.getValue())).toArray(DataTypes.Field[]::new);
    }
    final DataType structuredDataType = DataTypes.STRUCTURED(typeClass, structuredFields);
    if (isNullable) {
        return structuredDataType.nullable();
    } else {
        return structuredDataType.notNull();
    }
}
Also used : PojoTypeInfo(org.apache.flink.api.java.typeutils.PojoTypeInfo) DataTypeFactory(org.apache.flink.table.catalog.DataTypeFactory) DataTypeQueryable(org.apache.flink.table.types.DataTypeQueryable) IntStream(java.util.stream.IntStream) DataType(org.apache.flink.table.types.DataType) ExtractionUtils.getStructuredField(org.apache.flink.table.types.extraction.ExtractionUtils.getStructuredField) BasicArrayTypeInfo(org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo) StructuredType(org.apache.flink.table.types.logical.StructuredType) LocalDateTime(java.time.LocalDateTime) HashMap(java.util.HashMap) ExtractionUtils.extractAssigningConstructor(org.apache.flink.table.types.extraction.ExtractionUtils.extractAssigningConstructor) ObjectArrayTypeInfo(org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo) RowTypeInfo(org.apache.flink.api.java.typeutils.RowTypeInfo) RowType(org.apache.flink.table.types.logical.RowType) LinkedHashMap(java.util.LinkedHashMap) ListTypeInfo(org.apache.flink.api.java.typeutils.ListTypeInfo) BigDecimal(java.math.BigDecimal) RawType(org.apache.flink.table.types.logical.RawType) Map(java.util.Map) LocalTime(java.time.LocalTime) MapTypeInfo(org.apache.flink.api.java.typeutils.MapTypeInfo) TypeInformation(org.apache.flink.api.common.typeinfo.TypeInformation) Nullable(javax.annotation.Nullable) Types(org.apache.flink.api.common.typeinfo.Types) GenericTypeInfo(org.apache.flink.api.java.typeutils.GenericTypeInfo) CompositeType(org.apache.flink.api.common.typeutils.CompositeType) DataTypes(org.apache.flink.table.api.DataTypes) PojoField(org.apache.flink.api.java.typeutils.PojoField) Field(java.lang.reflect.Field) Instant(java.time.Instant) Collectors(java.util.stream.Collectors) PrimitiveArrayTypeInfo(org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo) List(java.util.List) LogicalType(org.apache.flink.table.types.logical.LogicalType) LocalDate(java.time.LocalDate) Internal(org.apache.flink.annotation.Internal) Row(org.apache.flink.types.Row) MultisetTypeInfo(org.apache.flink.api.java.typeutils.MultisetTypeInfo) ExtractionUtils(org.apache.flink.table.types.extraction.ExtractionUtils) TupleTypeInfoBase(org.apache.flink.api.java.typeutils.TupleTypeInfoBase) PojoTypeInfo(org.apache.flink.api.java.typeutils.PojoTypeInfo) LinkedHashMap(java.util.LinkedHashMap) ExtractionUtils.getStructuredField(org.apache.flink.table.types.extraction.ExtractionUtils.getStructuredField) PojoField(org.apache.flink.api.java.typeutils.PojoField) Field(java.lang.reflect.Field) DataType(org.apache.flink.table.types.DataType)

Example 75 with DataType

use of org.apache.flink.table.types.DataType in project flink by apache.

the class TableSchemaTest method testRowDataType.

@Test
public void testRowDataType() {
    final TableSchema schema = TableSchema.builder().add(TableColumn.physical("f0", DataTypes.BIGINT())).add(TableColumn.metadata("f1", DataTypes.BIGINT(), true)).add(TableColumn.metadata("f2", DataTypes.BIGINT(), false)).add(TableColumn.physical("f3", DataTypes.STRING())).add(TableColumn.computed("f4", DataTypes.BIGINT(), "f0 + 1")).add(TableColumn.metadata("f5", DataTypes.BIGINT(), false)).build();
    final DataType expectedDataType = DataTypes.ROW(DataTypes.FIELD("f0", DataTypes.BIGINT()), DataTypes.FIELD("f1", DataTypes.BIGINT()), DataTypes.FIELD("f2", DataTypes.BIGINT()), DataTypes.FIELD("f3", DataTypes.STRING()), DataTypes.FIELD("f4", DataTypes.BIGINT()), DataTypes.FIELD("f5", DataTypes.BIGINT())).notNull();
    assertThat(schema.toRowDataType(), equalTo(expectedDataType));
}
Also used : DataType(org.apache.flink.table.types.DataType) Test(org.junit.Test)

Aggregations

DataType (org.apache.flink.table.types.DataType)260 Test (org.junit.Test)72 RowType (org.apache.flink.table.types.logical.RowType)59 LogicalType (org.apache.flink.table.types.logical.LogicalType)58 RowData (org.apache.flink.table.data.RowData)54 List (java.util.List)38 FieldsDataType (org.apache.flink.table.types.FieldsDataType)32 ValidationException (org.apache.flink.table.api.ValidationException)31 ArrayList (java.util.ArrayList)29 Collectors (java.util.stream.Collectors)24 AtomicDataType (org.apache.flink.table.types.AtomicDataType)24 Map (java.util.Map)23 Internal (org.apache.flink.annotation.Internal)23 TableException (org.apache.flink.table.api.TableException)23 HashMap (java.util.HashMap)22 GenericRowData (org.apache.flink.table.data.GenericRowData)22 Row (org.apache.flink.types.Row)22 TableSchema (org.apache.flink.table.api.TableSchema)20 TypeConversions.fromLogicalToDataType (org.apache.flink.table.types.utils.TypeConversions.fromLogicalToDataType)19 ResolvedSchema (org.apache.flink.table.catalog.ResolvedSchema)18