use of org.apache.flink.table.types.logical.RowType in project flink by apache.
the class ParquetColumnarRowInputFormat method createPartitionedFormat.
/**
* Create a partitioned {@link ParquetColumnarRowInputFormat}, the partition columns can be
* generated by {@link Path}.
*/
public static <SplitT extends FileSourceSplit> ParquetColumnarRowInputFormat<SplitT> createPartitionedFormat(Configuration hadoopConfig, RowType producedRowType, TypeInformation<RowData> producedTypeInfo, List<String> partitionKeys, PartitionFieldExtractor<SplitT> extractor, int batchSize, boolean isUtcTimestamp, boolean isCaseSensitive) {
// TODO FLINK-25113 all this partition keys code should be pruned from the parquet format,
// because now FileSystemTableSource uses FileInfoExtractorBulkFormat for reading partition
// keys.
RowType projectedRowType = new RowType(producedRowType.getFields().stream().filter(field -> !partitionKeys.contains(field.getName())).collect(Collectors.toList()));
List<String> projectedNames = projectedRowType.getFieldNames();
ColumnBatchFactory<SplitT> factory = (SplitT split, ColumnVector[] parquetVectors) -> {
// create and initialize the row batch
ColumnVector[] vectors = new ColumnVector[producedRowType.getFieldCount()];
for (int i = 0; i < vectors.length; i++) {
RowType.RowField field = producedRowType.getFields().get(i);
vectors[i] = partitionKeys.contains(field.getName()) ? createVectorFromConstant(field.getType(), extractor.extract(split, field.getName(), field.getType()), batchSize) : parquetVectors[projectedNames.indexOf(field.getName())];
}
return new VectorizedColumnBatch(vectors);
};
return new ParquetColumnarRowInputFormat<>(hadoopConfig, projectedRowType, producedTypeInfo, factory, batchSize, isUtcTimestamp, isCaseSensitive);
}
use of org.apache.flink.table.types.logical.RowType in project flink by apache.
the class ParquetRowDataWriter method createWriter.
private FieldWriter createWriter(LogicalType t, Type type) {
if (type.isPrimitive()) {
switch(t.getTypeRoot()) {
case CHAR:
case VARCHAR:
return new StringWriter();
case BOOLEAN:
return new BooleanWriter();
case BINARY:
case VARBINARY:
return new BinaryWriter();
case DECIMAL:
DecimalType decimalType = (DecimalType) t;
return createDecimalWriter(decimalType.getPrecision(), decimalType.getScale());
case TINYINT:
return new ByteWriter();
case SMALLINT:
return new ShortWriter();
case DATE:
case TIME_WITHOUT_TIME_ZONE:
case INTEGER:
return new IntWriter();
case BIGINT:
return new LongWriter();
case FLOAT:
return new FloatWriter();
case DOUBLE:
return new DoubleWriter();
case TIMESTAMP_WITHOUT_TIME_ZONE:
TimestampType timestampType = (TimestampType) t;
return new TimestampWriter(timestampType.getPrecision());
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
LocalZonedTimestampType localZonedTimestampType = (LocalZonedTimestampType) t;
return new TimestampWriter(localZonedTimestampType.getPrecision());
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
} else {
GroupType groupType = type.asGroupType();
LogicalTypeAnnotation logicalType = type.getLogicalTypeAnnotation();
if (t instanceof ArrayType && logicalType instanceof LogicalTypeAnnotation.ListLogicalTypeAnnotation) {
return new ArrayWriter(((ArrayType) t).getElementType(), groupType);
} else if (t instanceof MapType && logicalType instanceof LogicalTypeAnnotation.MapLogicalTypeAnnotation) {
return new MapWriter(((MapType) t).getKeyType(), ((MapType) t).getValueType(), groupType);
} else if (t instanceof RowType && type instanceof GroupType) {
return new RowWriter((RowType) t, groupType);
} else {
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
}
use of org.apache.flink.table.types.logical.RowType in project flink by apache.
the class EmbeddedPythonScalarFunctionOperator method open.
@SuppressWarnings("unchecked")
@Override
public void open() throws Exception {
isOneArg = udfInputOffsets.length == 1;
isOneFieldResult = udfOutputType.getFieldCount() == 1;
super.open();
rowDataWrapper = new StreamRecordRowDataWrappingCollector(output);
reuseResultRowData = new GenericRowData(udfOutputType.getFieldCount());
RowType userDefinedFunctionInputType = new RowType(Arrays.stream(udfInputOffsets).mapToObj(i -> inputType.getFields().get(i)).collect(Collectors.toList()));
userDefinedFunctionInputConverters = userDefinedFunctionInputType.getFields().stream().map(RowType.RowField::getType).map(PythonTypeUtils::toDataConverter).toArray(PythonTypeUtils.DataConverter[]::new);
userDefinedFunctionInputArgs = new Object[udfInputOffsets.length];
userDefinedFunctionOutputConverters = udfOutputType.getFields().stream().map(RowType.RowField::getType).map(PythonTypeUtils::toDataConverter).toArray(PythonTypeUtils.DataConverter[]::new);
if (forwardedFieldGeneratedProjection != null) {
forwardedFieldProjection = forwardedFieldGeneratedProjection.newInstance(Thread.currentThread().getContextClassLoader());
}
}
use of org.apache.flink.table.types.logical.RowType in project flink by apache.
the class RegistryAvroFormatFactory method createEncodingFormat.
@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
String schemaRegistryURL = formatOptions.get(URL);
Optional<String> subject = formatOptions.getOptional(SUBJECT);
Map<String, ?> optionalPropertiesMap = buildOptionalPropertiesMap(formatOptions);
if (!subject.isPresent()) {
throw new ValidationException(String.format("Option %s.%s is required for serialization", IDENTIFIER, SUBJECT.key()));
}
return new EncodingFormat<SerializationSchema<RowData>>() {
@Override
public SerializationSchema<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType consumedDataType) {
final RowType rowType = (RowType) consumedDataType.getLogicalType();
return new AvroRowDataSerializationSchema(rowType, ConfluentRegistryAvroSerializationSchema.forGeneric(subject.get(), AvroSchemaConverter.convertToSchema(rowType), schemaRegistryURL, optionalPropertiesMap), RowDataToAvroConverters.createConverter(rowType));
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}
use of org.apache.flink.table.types.logical.RowType in project flink by apache.
the class RegistryAvroFormatFactory method createDecodingFormat.
@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
String schemaRegistryURL = formatOptions.get(URL);
Map<String, ?> optionalPropertiesMap = buildOptionalPropertiesMap(formatOptions);
return new ProjectableDecodingFormat<DeserializationSchema<RowData>>() {
@Override
public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType producedDataType, int[][] projections) {
producedDataType = Projection.of(projections).project(producedDataType);
final RowType rowType = (RowType) producedDataType.getLogicalType();
final TypeInformation<RowData> rowDataTypeInfo = context.createTypeInformation(producedDataType);
return new AvroRowDataDeserializationSchema(ConfluentRegistryAvroDeserializationSchema.forGeneric(AvroSchemaConverter.convertToSchema(rowType), schemaRegistryURL, optionalPropertiesMap), AvroToRowDataConverters.createRowConverter(rowType), rowDataTypeInfo);
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}
Aggregations