use of org.apache.flink.orc.OrcFilters.Predicate in project flink by apache.
the class OrcColumnarRowInputFormatTest method testReadFileAndRestoreWithFilter.
@Test
public void testReadFileAndRestoreWithFilter() throws IOException {
List<Predicate> filter = Collections.singletonList(new Or(new Between("_col0", PredicateLeaf.Type.LONG, 0L, 975000L), new Equals("_col0", PredicateLeaf.Type.LONG, 980001L), new Between("_col0", PredicateLeaf.Type.LONG, 990000L, 1800000L)));
OrcColumnarRowInputFormat<?, FileSourceSplit> format = createFormat(FLAT_FILE_TYPE, new int[] { 0, 1 }, filter);
// pick a middle split
FileSourceSplit split = createSplits(flatFile, 1).get(0);
int breakCnt = 975001;
int expectedCnt = 1795000;
long expectedTotalF0 = 1615113397500L;
innerTestRestore(format, split, breakCnt, expectedCnt, expectedTotalF0);
}
use of org.apache.flink.orc.OrcFilters.Predicate in project flink by apache.
the class OrcShimV200 method createRecordReader.
@Override
public RecordReader createRecordReader(Configuration conf, TypeDescription schema, int[] selectedFields, List<Predicate> conjunctPredicates, org.apache.flink.core.fs.Path path, long splitStart, long splitLength) throws IOException {
// open ORC file and create reader
Path hPath = new Path(path.toUri());
Reader orcReader = createReader(hPath, conf);
// get offset and length for the stripes that start in the split
Tuple2<Long, Long> offsetAndLength = getOffsetAndLengthForSplit(splitStart, splitLength, orcReader.getStripes());
// create ORC row reader configuration
Reader.Options options = readOrcConf(new Reader.Options().schema(schema).range(offsetAndLength.f0, offsetAndLength.f1), conf);
// configure filters
if (!conjunctPredicates.isEmpty()) {
SearchArgument.Builder b = SearchArgumentFactory.newBuilder();
b = b.startAnd();
for (Predicate predicate : conjunctPredicates) {
predicate.add(b);
}
b = b.end();
options.searchArgument(b.build(), new String[] {});
}
// configure selected fields
options.include(computeProjectionMask(schema, selectedFields));
// create ORC row reader
RecordReader orcRowsReader = createRecordReader(orcReader, options);
// assign ids
schema.getId();
return orcRowsReader;
}
Aggregations