use of org.apache.flink.table.types.DataType in project flink by apache.
the class KinesisDynamicTableFactory method createDynamicTableSource.
@Override
public DynamicTableSource createDynamicTableSource(Context context) {
FactoryUtil.TableFactoryHelper helper = FactoryUtil.createTableFactoryHelper(this, context);
ReadableConfig tableOptions = helper.getOptions();
ResolvedCatalogTable catalogTable = context.getCatalogTable();
DataType physicalDataType = catalogTable.getResolvedSchema().toPhysicalRowDataType();
KinesisConnectorOptionsUtil optionsUtils = new KinesisConnectorOptionsUtil(catalogTable.getOptions(), tableOptions);
// initialize the table format early in order to register its consumedOptionKeys
// in the TableFactoryHelper, as those are needed for correct option validation
DecodingFormat<DeserializationSchema<RowData>> decodingFormat = helper.discoverDecodingFormat(DeserializationFormatFactory.class, FORMAT);
// validate the data types of the table options
helper.validateExcept(optionsUtils.getNonValidatedPrefixes().toArray(new String[0]));
Properties properties = optionsUtils.getValidatedSourceConfigurations();
return new KinesisDynamicSource(physicalDataType, tableOptions.get(STREAM), properties, decodingFormat);
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class ParquetColumnarRowSplitReaderTest method innerTestPartitionValues.
private void innerTestPartitionValues(Path testPath, Map<String, Object> partSpec, boolean nullPartValue) throws IOException {
LogicalType[] fieldTypes = new LogicalType[] { new VarCharType(VarCharType.MAX_LENGTH), new BooleanType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DoubleType(), new TimestampType(9), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new BooleanType(), new DateType(), new TimestampType(9), new DoubleType(), new TinyIntType(), new SmallIntType(), new IntType(), new BigIntType(), new FloatType(), new DecimalType(5, 0), new DecimalType(15, 0), new DecimalType(20, 0), new VarCharType(VarCharType.MAX_LENGTH) };
ParquetColumnarRowSplitReader reader = ParquetSplitReaderUtil.genPartColumnarRowReader(false, true, new Configuration(), IntStream.range(0, 28).mapToObj(i -> "f" + i).toArray(String[]::new), Arrays.stream(fieldTypes).map(TypeConversions::fromLogicalToDataType).toArray(DataType[]::new), partSpec, new int[] { 7, 2, 4, 15, 19, 20, 21, 22, 23, 18, 16, 17, 24, 25, 26, 27 }, rowGroupSize, new Path(testPath.getPath()), 0, Long.MAX_VALUE);
int i = 0;
while (!reader.reachedEnd()) {
ColumnarRowData row = reader.nextRecord();
// common values
assertEquals(i, row.getDouble(0), 0);
assertEquals((byte) i, row.getByte(1));
assertEquals(i, row.getInt(2));
// partition values
if (nullPartValue) {
for (int j = 3; j < 16; j++) {
assertTrue(row.isNullAt(j));
}
} else {
assertTrue(row.getBoolean(3));
assertEquals(9, row.getByte(4));
assertEquals(10, row.getShort(5));
assertEquals(11, row.getInt(6));
assertEquals(12, row.getLong(7));
assertEquals(13, row.getFloat(8), 0);
assertEquals(6.6, row.getDouble(9), 0);
assertEquals(DateTimeUtils.toInternal(Date.valueOf("2020-11-23")), row.getInt(10));
assertEquals(LocalDateTime.of(1999, 1, 1, 1, 1), row.getTimestamp(11, 9).toLocalDateTime());
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(24), 5, 0), row.getDecimal(12, 5, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(25), 15, 0), row.getDecimal(13, 15, 0));
assertEquals(DecimalData.fromBigDecimal(new BigDecimal(26), 20, 0), row.getDecimal(14, 20, 0));
assertEquals("f27", row.getString(15).toString());
}
i++;
}
reader.close();
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class ParquetSplitReaderUtil method genPartColumnarRowReader.
/**
* Util for generating partitioned {@link ParquetColumnarRowSplitReader}.
*/
public static ParquetColumnarRowSplitReader genPartColumnarRowReader(boolean utcTimestamp, boolean caseSensitive, Configuration conf, String[] fullFieldNames, DataType[] fullFieldTypes, Map<String, Object> partitionSpec, int[] selectedFields, int batchSize, Path path, long splitStart, long splitLength) throws IOException {
List<String> nonPartNames = Arrays.stream(fullFieldNames).filter(n -> !partitionSpec.containsKey(n)).collect(Collectors.toList());
List<String> selNonPartNames = Arrays.stream(selectedFields).mapToObj(i -> fullFieldNames[i]).filter(nonPartNames::contains).collect(Collectors.toList());
int[] selParquetFields = selNonPartNames.stream().mapToInt(nonPartNames::indexOf).toArray();
ParquetColumnarRowSplitReader.ColumnBatchGenerator gen = readVectors -> {
// create and initialize the row batch
ColumnVector[] vectors = new ColumnVector[selectedFields.length];
for (int i = 0; i < vectors.length; i++) {
String name = fullFieldNames[selectedFields[i]];
LogicalType type = fullFieldTypes[selectedFields[i]].getLogicalType();
vectors[i] = partitionSpec.containsKey(name) ? createVectorFromConstant(type, partitionSpec.get(name), batchSize) : readVectors[selNonPartNames.indexOf(name)];
}
return new VectorizedColumnBatch(vectors);
};
return new ParquetColumnarRowSplitReader(utcTimestamp, caseSensitive, conf, Arrays.stream(selParquetFields).mapToObj(i -> fullFieldTypes[i].getLogicalType()).toArray(LogicalType[]::new), selNonPartNames.toArray(new String[0]), gen, batchSize, new org.apache.hadoop.fs.Path(path.toUri()), splitStart, splitLength);
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class RegistryAvroFormatFactory method createEncodingFormat.
@Override
public EncodingFormat<SerializationSchema<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
String schemaRegistryURL = formatOptions.get(URL);
Optional<String> subject = formatOptions.getOptional(SUBJECT);
Map<String, ?> optionalPropertiesMap = buildOptionalPropertiesMap(formatOptions);
if (!subject.isPresent()) {
throw new ValidationException(String.format("Option %s.%s is required for serialization", IDENTIFIER, SUBJECT.key()));
}
return new EncodingFormat<SerializationSchema<RowData>>() {
@Override
public SerializationSchema<RowData> createRuntimeEncoder(DynamicTableSink.Context context, DataType consumedDataType) {
final RowType rowType = (RowType) consumedDataType.getLogicalType();
return new AvroRowDataSerializationSchema(rowType, ConfluentRegistryAvroSerializationSchema.forGeneric(subject.get(), AvroSchemaConverter.convertToSchema(rowType), schemaRegistryURL, optionalPropertiesMap), RowDataToAvroConverters.createConverter(rowType));
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}
use of org.apache.flink.table.types.DataType in project flink by apache.
the class RegistryAvroFormatFactory method createDecodingFormat.
@Override
public DecodingFormat<DeserializationSchema<RowData>> createDecodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
FactoryUtil.validateFactoryOptions(this, formatOptions);
String schemaRegistryURL = formatOptions.get(URL);
Map<String, ?> optionalPropertiesMap = buildOptionalPropertiesMap(formatOptions);
return new ProjectableDecodingFormat<DeserializationSchema<RowData>>() {
@Override
public DeserializationSchema<RowData> createRuntimeDecoder(DynamicTableSource.Context context, DataType producedDataType, int[][] projections) {
producedDataType = Projection.of(projections).project(producedDataType);
final RowType rowType = (RowType) producedDataType.getLogicalType();
final TypeInformation<RowData> rowDataTypeInfo = context.createTypeInformation(producedDataType);
return new AvroRowDataDeserializationSchema(ConfluentRegistryAvroDeserializationSchema.forGeneric(AvroSchemaConverter.convertToSchema(rowType), schemaRegistryURL, optionalPropertiesMap), AvroToRowDataConverters.createRowConverter(rowType), rowDataTypeInfo);
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}
Aggregations