Search in sources :

Example 36 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class ParquetColumnarRowInputFormat method createPartitionedFormat.

/**
 * Create a partitioned {@link ParquetColumnarRowInputFormat}, the partition columns can be
 * generated by {@link Path}.
 */
public static <SplitT extends FileSourceSplit> ParquetColumnarRowInputFormat<SplitT> createPartitionedFormat(Configuration hadoopConfig, RowType producedRowType, TypeInformation<RowData> producedTypeInfo, List<String> partitionKeys, PartitionFieldExtractor<SplitT> extractor, int batchSize, boolean isUtcTimestamp, boolean isCaseSensitive) {
    // TODO FLINK-25113 all this partition keys code should be pruned from the parquet format,
    // because now FileSystemTableSource uses FileInfoExtractorBulkFormat for reading partition
    // keys.
    RowType projectedRowType = new RowType(producedRowType.getFields().stream().filter(field -> !partitionKeys.contains(field.getName())).collect(Collectors.toList()));
    List<String> projectedNames = projectedRowType.getFieldNames();
    ColumnBatchFactory<SplitT> factory = (SplitT split, ColumnVector[] parquetVectors) -> {
        // create and initialize the row batch
        ColumnVector[] vectors = new ColumnVector[producedRowType.getFieldCount()];
        for (int i = 0; i < vectors.length; i++) {
            RowType.RowField field = producedRowType.getFields().get(i);
            vectors[i] = partitionKeys.contains(field.getName()) ? createVectorFromConstant(field.getType(), extractor.extract(split, field.getName(), field.getType()), batchSize) : parquetVectors[projectedNames.indexOf(field.getName())];
        }
        return new VectorizedColumnBatch(vectors);
    };
    return new ParquetColumnarRowInputFormat<>(hadoopConfig, projectedRowType, producedTypeInfo, factory, batchSize, isUtcTimestamp, isCaseSensitive);
}
Also used : VectorizedColumnBatch(org.apache.flink.table.data.columnar.vector.VectorizedColumnBatch) RowType(org.apache.flink.table.types.logical.RowType) WritableColumnVector(org.apache.flink.table.data.columnar.vector.writable.WritableColumnVector) ColumnVector(org.apache.flink.table.data.columnar.vector.ColumnVector)

Example 37 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class ParquetRowDataWriter method createWriter.

private FieldWriter createWriter(LogicalType t, Type type) {
    if (type.isPrimitive()) {
        switch(t.getTypeRoot()) {
            case CHAR:
            case VARCHAR:
                return new StringWriter();
            case BOOLEAN:
                return new BooleanWriter();
            case BINARY:
            case VARBINARY:
                return new BinaryWriter();
            case DECIMAL:
                DecimalType decimalType = (DecimalType) t;
                return createDecimalWriter(decimalType.getPrecision(), decimalType.getScale());
            case TINYINT:
                return new ByteWriter();
            case SMALLINT:
                return new ShortWriter();
            case DATE:
            case TIME_WITHOUT_TIME_ZONE:
            case INTEGER:
                return new IntWriter();
            case BIGINT:
                return new LongWriter();
            case FLOAT:
                return new FloatWriter();
            case DOUBLE:
                return new DoubleWriter();
            case TIMESTAMP_WITHOUT_TIME_ZONE:
                TimestampType timestampType = (TimestampType) t;
                return new TimestampWriter(timestampType.getPrecision());
            case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
                LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) t;
                return new TimestampWriter(localZonedTimestampType.getPrecision());
            default:
                throw new UnsupportedOperationException("Unsupported type: " + type);
        }
    } else {
        GroupType groupType = type.asGroupType();
        LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
        if (t instanceof ArrayType && logicalType instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
            return new ArrayWriter(((ArrayType) t).getElementType(), groupType);
        } else if (t instanceof MapType && logicalType instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation) {
            return new MapWriter(((MapType) t).getKeyType(), ((MapType) t).getValueType(), groupType);
        } else if (t instanceof RowType && type instanceof GroupType) {
            return new RowWriter((RowType) t, groupType);
        } else {
            throw new UnsupportedOperationException("Unsupported type: " + type);
        }
    }
}
Also used : LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType) RowType(org.apache.flink.table.types.logical.RowType) MapType(org.apache.flink.table.types.logical.MapType) ArrayType(org.apache.flink.table.types.logical.ArrayType) GroupType(org.apache.parquet.schema.GroupType) LogicalTypeAnnotation(org.apache.parquet.schema.LogicalTypeAnnotation) DecimalType(org.apache.flink.table.types.logical.DecimalType) TimestampType(org.apache.flink.table.types.logical.TimestampType) LocalZonedTimestampType(org.apache.flink.table.types.logical.LocalZonedTimestampType)

Example 38 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class EmbeddedPythonScalarFunctionOperator method open.

@SuppressWarnings("unchecked")
@Override
public void open() throws Exception {
    isOneArg = udfInputOffsets.length == 1;
    isOneFieldResult = udfOutputType.getFieldCount() == 1;
    super.open();
    rowDataWrapper = new StreamRecordRowDataWrappingCollector(output);
    reuseResultRowData = new GenericRowData(udfOutputType.getFieldCount());
    RowType userDefinedFunctionInputType = new RowType(Arrays.stream(udfInputOffsets).mapToObj(i -> inputType.getFields().get(i)).collect(Collectors.toList()));
    userDefinedFunctionInputConverters = userDefinedFunctionInputType.getFields().stream().map(RowType.RowField::getType).map(PythonTypeUtils::toDataConverter).toArray(PythonTypeUtils.DataConverter[]::new);
    userDefinedFunctionInputArgs = new Object[udfInputOffsets.length];
    userDefinedFunctionOutputConverters = udfOutputType.getFields().stream().map(RowType.RowField::getType).map(PythonTypeUtils::toDataConverter).toArray(PythonTypeUtils.DataConverter[]::new);
    if (forwardedFieldGeneratedProjection != null) {
        forwardedFieldProjection = forwardedFieldGeneratedProjection.newInstance(Thread.currentThread().getContextClassLoader());
    }
}
Also used : StreamRecordRowDataWrappingCollector(org.apache.flink.table.runtime.operators.python.utils.StreamRecordRowDataWrappingCollector) GenericRowData(org.apache.flink.table.data.GenericRowData) RowType(org.apache.flink.table.types.logical.RowType) PythonTypeUtils(org.apache.flink.table.runtime.typeutils.PythonTypeUtils)

Example 39 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class RegistryAvroFormatFactory method createEncodingFormat.

@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
    FactoryUtil.validateFactoryOptions(this, formatOptions);
    String schemaRegistryURL = formatOptions.get(URL);
    Optional<String> subject = formatOptions.getOptional(SUBJECT);
    Map<String, ?> optionalPropertiesMap = buildOptionalPropertiesMap(formatOptions);
    if (!subject.isPresent()) {
        throw new ValidationException(String.format("Option %s.%s is required for serialization", IDENTIFIER, SUBJECT.key()));
    }
    return new EncodingFormat<SerializationSchema<RowData>>() {

        @Override
        public SerializationSchema<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType consumedDataType) {
            final RowType rowType = (RowType) consumedDataType.getLogicalType();
            return new AvroRowDataSerializationSchema(rowType, ConfluentRegistryAvroSerializationSchema.forGeneric(subject.get(), AvroSchemaConverter.convertToSchema(rowType), schemaRegistryURL, optionalPropertiesMap), RowDataToAvroConverters.createConverter(rowType));
        }

        @Override
        public ChangelogMode getChangelogMode() {
            return ChangelogMode.insertOnly();
        }
    };
}
Also used : EncodingFormat(org.apache.flink.table.connector.format.EncodingFormat) RowData(org.apache.flink.table.data.RowData) AvroRowDataSerializationSchema(org.apache.flink.formats.avro.AvroRowDataSerializationSchema) ValidationException(org.apache.flink.table.api.ValidationException) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType)

Example 40 with RowType

use of org.apache.flink.table.types.logical.RowType in project flink by apache.

the class RegistryAvroFormatFactory method createDecodingFormat.

@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
    FactoryUtil.validateFactoryOptions(this, formatOptions);
    String schemaRegistryURL = formatOptions.get(URL);
    Map<String, ?> optionalPropertiesMap = buildOptionalPropertiesMap(formatOptions);
    return new ProjectableDecodingFormat<DeserializationSchema<RowData>>() {

        @Override
        public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType producedDataType, int[][] projections) {
            producedDataType = Projection.of(projections).project(producedDataType);
            final RowType rowType = (RowType) producedDataType.getLogicalType();
            final TypeInformation<RowData> rowDataTypeInfo = context.createTypeInformation(producedDataType);
            return new AvroRowDataDeserializationSchema(ConfluentRegistryAvroDeserializationSchema.forGeneric(AvroSchemaConverter.convertToSchema(rowType), schemaRegistryURL, optionalPropertiesMap), AvroToRowDataConverters.createRowConverter(rowType), rowDataTypeInfo);
        }

        @Override
        public ChangelogMode getChangelogMode() {
            return ChangelogMode.insertOnly();
        }
    };
}
Also used : RowData(org.apache.flink.table.data.RowData) ProjectableDecodingFormat(org.apache.flink.table.connector.format.ProjectableDecodingFormat) DataType(org.apache.flink.table.types.DataType) RowType(org.apache.flink.table.types.logical.RowType) AvroRowDataDeserializationSchema(org.apache.flink.formats.avro.AvroRowDataDeserializationSchema)

Aggregations

RowType (org.apache.flink.table.types.logical.RowType)212 RowData (org.apache.flink.table.data.RowData)108 LogicalType (org.apache.flink.table.types.logical.LogicalType)59 DataType (org.apache.flink.table.types.DataType)57 Transformation (org.apache.flink.api.dag.Transformation)50 ExecEdge (org.apache.flink.table.planner.plan.nodes.exec.ExecEdge)46 TableException (org.apache.flink.table.api.TableException)37 Test (org.junit.Test)36 GenericRowData (org.apache.flink.table.data.GenericRowData)33 ArrayList (java.util.ArrayList)28 List (java.util.List)28 OneInputTransformation (org.apache.flink.streaming.api.transformations.OneInputTransformation)26 RowDataKeySelector (org.apache.flink.table.runtime.keyselector.RowDataKeySelector)25 CodeGeneratorContext (org.apache.flink.table.planner.codegen.CodeGeneratorContext)22 TableConfig (org.apache.flink.table.api.TableConfig)19 ArrayType (org.apache.flink.table.types.logical.ArrayType)19 TimestampType (org.apache.flink.table.types.logical.TimestampType)19 DecimalType (org.apache.flink.table.types.logical.DecimalType)17 Collections (java.util.Collections)16 AggregateInfoList (org.apache.flink.table.planner.plan.utils.AggregateInfoList)16