use of org.apache.flink.orc.vector.RecordVectorizer in project flink by apache.
the class OrcBulkWriterITCase method testOrcBulkWriter.
@Test
public void testOrcBulkWriter() throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<Record> factory = new OrcBulkWriterFactory<>(new RecordVectorizer(schema), writerProps, new Configuration());
env.setParallelism(1);
env.enableCheckpointing(100);
DataStream<Record> stream = env.addSource(new FiniteTestSource<>(testData), TypeInformation.of(Record.class));
stream.map(str -> str).addSink(StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), factory).withBucketAssigner(new UniqueBucketAssigner<>("test")).build());
env.execute();
OrcBulkWriterTestUtil.validate(outDir, testData);
}
use of org.apache.flink.orc.vector.RecordVectorizer in project flink by apache.
the class OrcBulkWriterFactoryTest method testNotOverrideInMemoryManager.
@Test
public void testNotOverrideInMemoryManager() throws IOException {
TestMemoryManager memoryManager = new TestMemoryManager();
OrcBulkWriterFactory<Record> factory = new TestOrcBulkWriterFactory<>(new RecordVectorizer("struct<_col0:string,_col1:int>"), memoryManager);
factory.create(new LocalDataOutputStream(temporaryFolder.newFile()));
factory.create(new LocalDataOutputStream(temporaryFolder.newFile()));
List<Path> addedWriterPath = memoryManager.getAddedWriterPath();
assertEquals(2, addedWriterPath.size());
assertNotEquals(addedWriterPath.get(0), addedWriterPath.get(1));
}
use of org.apache.flink.orc.vector.RecordVectorizer in project flink by apache.
the class OrcBulkWriterTest method testOrcBulkWriter.
@Test
public void testOrcBulkWriter() throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
final Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<Record> writer = new OrcBulkWriterFactory<>(new RecordVectorizer(schema), writerProps, new Configuration());
StreamingFileSink<Record> sink = StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), writer).withBucketAssigner(new UniqueBucketAssigner<>("test")).withBucketCheckInterval(10000).build();
try (OneInputStreamOperatorTestHarness<Record, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
testHarness.setup();
testHarness.open();
int time = 0;
for (final Record record : input) {
testHarness.processElement(record, ++time);
}
testHarness.snapshot(1, ++time);
testHarness.notifyOfCompletedCheckpoint(1);
OrcBulkWriterTestUtil.validate(outDir, input);
}
}
Aggregations