use of io.trino.orc.OrcReader.MAX_BATCH_SIZE in project trino by trinodb.
the class TestOrcReaderPositions method testRowGroupSkipping.
@Test
public void testRowGroupSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
// create single strip file with multiple row groups
int rowCount = 142_000;
createSequentialFile(tempFile.getFile(), rowCount);
// test reading two row groups from middle of file
OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
if (numberOfRows == rowCount) {
return true;
}
IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
return (stats.getMin() == 50_000) || (stats.getMin() == 60_000);
};
try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
assertEquals(reader.getFileRowCount(), rowCount);
assertEquals(reader.getReaderRowCount(), rowCount);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
long position = 50_000;
while (true) {
Page page = reader.nextPage();
if (page == null) {
break;
}
page = page.getLoadedPage();
Block block = page.getBlock(0);
for (int i = 0; i < block.getPositionCount(); i++) {
assertEquals(BIGINT.getLong(block, i), position + i);
}
assertEquals(reader.getFilePosition(), position);
assertEquals(reader.getReaderPosition(), position);
position += page.getPositionCount();
}
assertEquals(position, 70_000);
assertEquals(reader.getFilePosition(), rowCount);
assertEquals(reader.getReaderPosition(), rowCount);
}
}
}
use of io.trino.orc.OrcReader.MAX_BATCH_SIZE in project trino by trinodb.
the class TestOrcReaderPositions method testStripeSkipping.
@Test
public void testStripeSkipping() throws Exception {
try (TempFile tempFile = new TempFile()) {
createMultiStripeFile(tempFile.getFile());
// test reading second and fourth stripes
OrcPredicate predicate = (numberOfRows, allColumnStatistics) -> {
if (numberOfRows == 100) {
return true;
}
IntegerStatistics stats = allColumnStatistics.get(new OrcColumnId(1)).getIntegerStatistics();
return ((stats.getMin() == 60) && (stats.getMax() == 117)) || ((stats.getMin() == 180) && (stats.getMax() == 237));
};
try (OrcRecordReader reader = createCustomOrcRecordReader(tempFile, predicate, BIGINT, MAX_BATCH_SIZE)) {
assertEquals(reader.getFileRowCount(), 100);
assertEquals(reader.getReaderRowCount(), 40);
assertEquals(reader.getFilePosition(), 0);
assertEquals(reader.getReaderPosition(), 0);
// second stripe
Page page = reader.nextPage().getLoadedPage();
assertEquals(page.getPositionCount(), 20);
assertEquals(reader.getReaderPosition(), 0);
assertEquals(reader.getFilePosition(), 20);
assertCurrentBatch(page, 1);
// fourth stripe
page = reader.nextPage().getLoadedPage();
assertEquals(page.getPositionCount(), 20);
assertEquals(reader.getReaderPosition(), 20);
assertEquals(reader.getFilePosition(), 60);
assertCurrentBatch(page, 3);
page = reader.nextPage();
assertNull(page);
assertEquals(reader.getReaderPosition(), 40);
assertEquals(reader.getFilePosition(), 100);
}
}
}
Aggregations