use of com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE in project presto by prestodb.
the class TestOrcReaderPositions method testFilterFunctionWithAppendRowNumber.
@Test
public void testFilterFunctionWithAppendRowNumber() throws Exception {
try (TempFile tempFile = new TempFile()) {
int rowCount = 100;
createSequentialFile(tempFile.getFile(), rowCount);
List<Long> expectedValues = LongStream.range(0, 100).boxed().filter(input -> input % 2 != 0).collect(ArrayList::new, List::add, List::addAll);
ConnectorSession session = new TestingConnectorSession(ImmutableList.of());
FilterFunction filter = new FilterFunction(session.getSqlFunctionProperties(), true, new IsOddPredicate());
OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), ORC, OrcPredicate.TRUE, ImmutableList.of(BIGINT), MAX_BATCH_SIZE, ImmutableMap.of(), ImmutableList.of(filter), ImmutableMap.of(0, 0), ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(), ImmutableMap.of(0, BIGINT), ImmutableList.of(0), false, new TestingHiveOrcAggregatedMemoryContext(), true);
verifyAppendNumber(expectedValues, reader);
}
}
use of com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE in project presto by prestodb.
the class TestOrcReaderPositions method testRowGroupSkipping.
@Test
public void testRowGroupSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single stripe file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
};
try (OrcBatchRecordReader reader = createCustomOrcRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, false)) {
assertEquals(reader.getFileRowCount(), rowCount);
assertEquals(reader.getReaderRowCount(), rowCount);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
long position = 50_000;
while (true) {
int batchSize = reader.nextBatch();
if (batchSize == -1) {
break;
}
Block block = reader.readBlock(0);
for (int i = 0; i < batchSize; i++) {
assertEquals(BIGINT.getLong(block, i), position + i);
}
assertEquals(reader.getFilePosition(), position);
assertEquals(reader.getReaderPosition(), position);
position += batchSize;
}
assertEquals(position, 70_000);
assertEquals(reader.getFilePosition(), rowCount);
assertEquals(reader.getReaderPosition(), rowCount);
}
}
}
use of com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE in project presto by prestodb.
the class TestOrcReaderPositions method testRowGroupSkippingWithAppendRowNumber.
@Test
public void testRowGroupSkippingWithAppendRowNumber() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single stripe file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 70_000);
};
List<Long> expectedValues = new ArrayList<>();
expectedValues.addAll(LongStream.range(50_000, 60_000).collect(ArrayList::new, List::add, List::addAll));
expectedValues.addAll(LongStream.range(70_000, 80_000).collect(ArrayList::new, List::add, List::addAll));
OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, true);
verifyAppendNumber(expectedValues, reader);
}
}
use of com.facebook.presto.orc.OrcReader.MAX_BATCH_SIZE in project presto by prestodb.
the class TestOrcReaderPositions method testStripeSkippingWithAppendNumber.
@Test
public void testStripeSkippingWithAppendNumber() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
// EVery stripe has 20 rows and there are total of 5 stripes
// test reading second and fourth stripes
OrcPredicate predicate = (numberOfRows, statisticsByColumnIndex) -> {
if (numberOfRows == 100) {
return true;
}
IntegerStatistics stats = statisticsByColumnIndex.get(0).getIntegerStatistics();
return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
};
List<Long> expectedValues = new ArrayList<>();
expectedValues.addAll(LongStream.range(20, 40).collect(ArrayList::new, List::add, List::addAll));
expectedValues.addAll(LongStream.range(60, 80).collect(ArrayList::new, List::add, List::addAll));
List<Long> actualValues = new ArrayList<>();
OrcSelectiveRecordReader reader = createCustomOrcSelectiveRecordReader(tempFile, ORC, predicate, BIGINT, MAX_BATCH_SIZE, false, true);
assertNotNull(reader);
Page returnPage;
while (true) {
returnPage = reader.getNextPage();
if (returnPage == null) {
break;
}
Block rowNumberBlock = returnPage.getBlock(1);
for (int i = 0; i < returnPage.getPositionCount(); i++) {
actualValues.add(rowNumberBlock.getLong(i));
}
}
assertEquals(actualValues, expectedValues);
}
}
Aggregations