use of org.apache.orc.Reader in project flink by apache.
the class OrcNoHiveShim method createRecordReader.
@Override
public RecordReader createRecordReader(Configuration conf, TypeDescription schema, int[] selectedFields, List<OrcFilters.Predicate> conjunctPredicates, org.apache.flink.core.fs.Path path, long splitStart, long splitLength) throws IOException {
// open ORC file and create reader
org.apache.hadoop.fs.Path hPath = new org.apache.hadoop.fs.Path(path.toUri());
Reader orcReader = OrcFile.createReader(hPath, OrcFile.readerOptions(conf));
// get offset and length for the stripes that start in the split
Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(splitStart, splitLength, orcReader.getStripes());
// create ORC row reader configuration
Reader.Options options = new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1).useZeroCopy(OrcConf.USE_ZEROCOPY.getBoolean(conf)).skipCorruptRecords(OrcConf.SKIP_CORRUPT_DATA.getBoolean(conf)).tolerateMissingSchema(OrcConf.TOLERATE_MISSING_SCHEMA.getBoolean(conf));
// TODO configure filters
// configure selected fields
options.include(computeProjectionMask(schema, selectedFields));
// create ORC row reader
RecordReader orcRowsReader = orcReader.rows(options);
// assign ids
schema.getId();
return orcRowsReader;
}
use of org.apache.orc.Reader in project flink by apache.
the class OrcShimV200 method createReader.
protected Reader createReader(Path path, Configuration conf) throws IOException {
try {
Class orcFileClass = Class.forName("org.apache.hadoop.hive.ql.io.orc.OrcFile");
Object readerOptions = invokeStaticMethod(orcFileClass, "readerOptions", conf);
Class readerClass = Class.forName("org.apache.hadoop.hive.ql.io.orc.ReaderImpl");
// noinspection unchecked
return (Reader) invokeConstructor(readerClass, path, readerOptions);
} catch (ClassNotFoundException | NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) {
throw new IOException(e);
}
}
use of org.apache.orc.Reader in project flink by apache.
the class OrcFileSystemITCase method testNonPartition.
@Override
public void testNonPartition() {
super.testNonPartition();
// test configure success
File directory = new File(URI.create(resultPath()).getPath());
File[] files = directory.listFiles((dir, name) -> !name.startsWith(".") && !name.startsWith("_"));
Assert.assertNotNull(files);
Path path = new Path(URI.create(files[0].getAbsolutePath()));
try {
Reader reader = OrcFile.createReader(path, OrcFile.readerOptions(new Configuration()));
if (configure) {
Assert.assertEquals("SNAPPY", reader.getCompressionKind().toString());
} else {
Assert.assertEquals("ZLIB", reader.getCompressionKind().toString());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
use of org.apache.orc.Reader in project flink by apache.
the class OrcShimV200 method createRecordReader.
@Override
public RecordReader createRecordReader(Configuration conf, TypeDescription schema, int[] selectedFields, List<Predicate> conjunctPredicates, org.apache.flink.core.fs.Path path, long splitStart, long splitLength) throws IOException {
// open ORC file and create reader
Path hPath = new Path(path.toUri());
Reader orcReader = createReader(hPath, conf);
// get offset and length for the stripes that start in the split
Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(splitStart, splitLength, orcReader.getStripes());
// create ORC row reader configuration
Reader.Options options = readOrcConf(new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1), conf);
// configure filters
if (!conjunctPredicates.isEmpty()) {
SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
b = b.startAnd();
for (Predicate predicate : conjunctPredicates) {
predicate.add(b);
}
b = b.end();
options.searchArgument(b.build(), new String[] {});
}
// configure selected fields
options.include(computeProjectionMask(schema, selectedFields));
// create ORC row reader
RecordReader orcRowsReader = createRecordReader(orcReader, options);
// assign ids
schema.getId();
return orcRowsReader;
}
use of org.apache.orc.Reader in project flink by apache.
the class OrcBulkRowDataWriterTest method validate.
private void validate(File files, List<RowData> expected) throws IOException {
final File[] buckets = files.listFiles();
assertNotNull(buckets);
assertEquals(1, buckets.length);
final File[] partFiles = buckets[0].listFiles();
assertNotNull(partFiles);
for (File partFile : partFiles) {
assertTrue(partFile.length() > 0);
OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(new Configuration());
Reader reader = OrcFile.createReader(new org.apache.hadoop.fs.Path(partFile.toURI()), readerOptions);
assertEquals(2, reader.getNumberOfRows());
assertEquals(4, reader.getSchema().getFieldNames().size());
assertSame(reader.getCompressionKind(), CompressionKind.LZ4);
List<RowData> results = getResults(reader);
assertEquals(2, results.size());
assertEquals(results, expected);
}
}
Aggregations