use of org.apache.flink.orc.vector.RowDataVectorizer in project flink by apache.
the class OrcFileFormatFactory method createEncodingFormat.
@Override
public EncodingFormat<BulkWriter.Factory<RowData>> createEncodingFormat(DynamicTableFactory.Context context, ReadableConfig formatOptions) {
return new EncodingFormat<BulkWriter.Factory<RowData>>() {
@Override
public BulkWriter.Factory<RowData> createRuntimeEncoder(DynamicTableSink.Context sinkContext, DataType consumedDataType) {
RowType formatRowType = (RowType) consumedDataType.getLogicalType();
LogicalType[] orcTypes = formatRowType.getChildren().toArray(new LogicalType[0]);
TypeDescription typeDescription = OrcSplitReaderUtil.logicalTypeToOrcType(formatRowType);
return new OrcBulkWriterFactory<>(new RowDataVectorizer(typeDescription.toString(), orcTypes), getOrcProperties(formatOptions), new Configuration());
}
@Override
public ChangelogMode getChangelogMode() {
return ChangelogMode.insertOnly();
}
};
}
use of org.apache.flink.orc.vector.RowDataVectorizer in project flink by apache.
the class OrcBulkRowDataWriterTest method testOrcBulkWriterWithRowData.
@Test
public void testOrcBulkWriterWithRowData() throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
final Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<RowData> writer = new OrcBulkWriterFactory<>(new RowDataVectorizer(schema, fieldTypes), writerProps, new Configuration());
StreamingFileSink<RowData> sink = StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), writer).withBucketAssigner(new UniqueBucketAssigner<>("test")).withBucketCheckInterval(10000).build();
try (OneInputStreamOperatorTestHarness<RowData, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
testHarness.setup();
testHarness.open();
int time = 0;
for (final RowData record : input) {
testHarness.processElement(record, ++time);
}
testHarness.snapshot(1, ++time);
testHarness.notifyOfCompletedCheckpoint(1);
validate(outDir, input);
}
}
use of org.apache.flink.orc.vector.RowDataVectorizer in project flink by apache.
the class OrcFileSystemITCase method initNestedTypesFile.
private String initNestedTypesFile(List<RowData> data) throws Exception {
LogicalType[] fieldTypes = new LogicalType[4];
fieldTypes[0] = new VarCharType();
fieldTypes[1] = new IntType();
List<RowType.RowField> arrayRowFieldList = Collections.singletonList(new RowType.RowField("_col2_col0", new VarCharType()));
fieldTypes[2] = new ArrayType(new RowType(arrayRowFieldList));
List<RowType.RowField> mapRowFieldList = Arrays.asList(new RowType.RowField("_col3_col0", new VarCharType()), new RowType.RowField("_col3_col1", new TimestampType()));
fieldTypes[3] = new MapType(new VarCharType(), new RowType(mapRowFieldList));
String schema = "struct<_col0:string,_col1:int,_col2:array<struct<_col2_col0:string>>," + "_col3:map<string,struct<_col3_col0:string,_col3_col1:timestamp>>>";
File outDir = TEMPORARY_FOLDER.newFolder();
Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<RowData> writer = new OrcBulkWriterFactory<>(new RowDataVectorizer(schema, fieldTypes), writerProps, new Configuration());
StreamingFileSink<RowData> sink = StreamingFileSink.forBulkFormat(new org.apache.flink.core.fs.Path(outDir.toURI()), writer).withBucketCheckInterval(10000).build();
try (OneInputStreamOperatorTestHarness<RowData, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
testHarness.setup();
testHarness.open();
int time = 0;
for (final RowData record : data) {
testHarness.processElement(record, ++time);
}
testHarness.snapshot(1, ++time);
testHarness.notifyOfCompletedCheckpoint(1);
}
return outDir.getAbsolutePath();
}
Aggregations