use of org.apache.flink.orc.data.Record in project flink by apache.
the class OrcBulkWriterITCase method testOrcBulkWriter.
@Test
public void testOrcBulkWriter() throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
final Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<Record> factory = new OrcBulkWriterFactory<>(new RecordVectorizer(schema), writerProps, new Configuration());
env.setParallelism(1);
env.enableCheckpointing(100);
DataStream<Record> stream = env.addSource(new FiniteTestSource<>(testData), TypeInformation.of(Record.class));
stream.map(str -> str).addSink(StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), factory).withBucketAssigner(new UniqueBucketAssigner<>("test")).build());
env.execute();
OrcBulkWriterTestUtil.validate(outDir, testData);
}
use of org.apache.flink.orc.data.Record in project flink by apache.
the class OrcBulkWriterFactoryTest method testNotOverrideInMemoryManager.
@Test
public void testNotOverrideInMemoryManager() throws IOException {
TestMemoryManager memoryManager = new TestMemoryManager();
OrcBulkWriterFactory<Record> factory = new TestOrcBulkWriterFactory<>(new RecordVectorizer("struct<_col0:string,_col1:int>"), memoryManager);
factory.create(new LocalDataOutputStream(temporaryFolder.newFile()));
factory.create(new LocalDataOutputStream(temporaryFolder.newFile()));
List<Path> addedWriterPath = memoryManager.getAddedWriterPath();
assertEquals(2, addedWriterPath.size());
assertNotEquals(addedWriterPath.get(0), addedWriterPath.get(1));
}
use of org.apache.flink.orc.data.Record in project flink by apache.
the class OrcBulkWriterTestUtil method validate.
public static void validate(File files, List<Record> expected) throws IOException {
final File[] buckets = files.listFiles();
assertNotNull(buckets);
assertEquals(1, buckets.length);
final File[] partFiles = buckets[0].listFiles();
assertNotNull(partFiles);
for (File partFile : partFiles) {
assertTrue(partFile.length() > 0);
OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(new Configuration());
Reader reader = OrcFile.createReader(new org.apache.hadoop.fs.Path(partFile.toURI()), readerOptions);
assertEquals(3, reader.getNumberOfRows());
assertEquals(2, reader.getSchema().getFieldNames().size());
assertSame(reader.getCompressionKind(), CompressionKind.LZ4);
assertTrue(reader.hasMetadataValue(USER_METADATA_KEY));
assertTrue(reader.getMetadataKeys().contains(USER_METADATA_KEY));
List<Record> results = getResults(reader);
assertEquals(3, results.size());
assertEquals(results, expected);
}
}
use of org.apache.flink.orc.data.Record in project flink by apache.
the class OrcBulkWriterTestUtil method getResults.
private static List<Record> getResults(Reader reader) throws IOException {
List<Record> results = new ArrayList<>();
RecordReader recordReader = reader.rows();
VectorizedRowBatch batch = reader.getSchema().createRowBatch();
while (recordReader.nextBatch(batch)) {
BytesColumnVector stringVector = (BytesColumnVector) batch.cols[0];
LongColumnVector intVector = (LongColumnVector) batch.cols[1];
for (int r = 0; r < batch.size; r++) {
String name = new String(stringVector.vector[r], stringVector.start[r], stringVector.length[r]);
int age = (int) intVector.vector[r];
results.add(new Record(name, age));
}
recordReader.close();
}
return results;
}
use of org.apache.flink.orc.data.Record in project flink by apache.
the class OrcBulkWriterTest method testOrcBulkWriter.
@Test
public void testOrcBulkWriter() throws Exception {
final File outDir = TEMPORARY_FOLDER.newFolder();
final Properties writerProps = new Properties();
writerProps.setProperty("orc.compress", "LZ4");
final OrcBulkWriterFactory<Record> writer = new OrcBulkWriterFactory<>(new RecordVectorizer(schema), writerProps, new Configuration());
StreamingFileSink<Record> sink = StreamingFileSink.forBulkFormat(new Path(outDir.toURI()), writer).withBucketAssigner(new UniqueBucketAssigner<>("test")).withBucketCheckInterval(10000).build();
try (OneInputStreamOperatorTestHarness<Record, Object> testHarness = new OneInputStreamOperatorTestHarness<>(new StreamSink<>(sink), 1, 1, 0)) {
testHarness.setup();
testHarness.open();
int time = 0;
for (final Record record : input) {
testHarness.processElement(record, ++time);
}
testHarness.snapshot(1, ++time);
testHarness.notifyOfCompletedCheckpoint(1);
OrcBulkWriterTestUtil.validate(outDir, input);
}
}
Aggregations