use of com.facebook.presto.common.Page in project presto by prestodb.
the class TestOrcFileRewriter method testRewriterDropThenAddDifferentColumns.
/**
* The following test add or drop different columns
*/
@Test
public void testRewriterDropThenAddDifferentColumns() throws Exception {
FunctionAndTypeManager functionAndTypeManager = createTestFunctionAndTypeManager();
DBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime() + "_" + ThreadLocalRandom.current().nextInt());
dbi.registerMapper(new TableColumn.Mapper(functionAndTypeManager));
Handle dummyHandle = dbi.open();
File dataDir = Files.createTempDir();
StorageManager storageManager = createOrcStorageManager(dbi, dataDir);
List<Long> columnIds = ImmutableList.of(3L, 7L);
List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20));
File file = new File(temporary, randomUUID().toString());
try (FileWriter writer = createFileWriter(columnIds, columnTypes, file, false)) {
List<Page> pages = rowPagesBuilder(columnTypes).row(1L, "1").row(2L, "2").row(3L, "3").row(4L, "4").build();
writer.appendPages(pages);
}
// Add a column
File newFile1 = new File(temporary, randomUUID().toString());
FileSystem fileSystem = new LocalOrcDataEnvironment().getFileSystem(DEFAULT_RAPTOR_CONTEXT);
OrcFileInfo info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(3L, 7L, 10L), ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE)), path(file), path(newFile1), new BitSet(5));
assertEquals(info.getRowCount(), 4);
assertEquals(readAllBytes(file.toPath()), readAllBytes(newFile1.toPath()));
// Drop a column
File newFile2 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L), ImmutableList.of(createVarcharType(20), DOUBLE)), path(newFile1), path(newFile2), new BitSet(5));
assertEquals(info.getRowCount(), 4);
// Optimized writer will keep the only column
OrcReader orcReader = new OrcReader(fileOrcDataSource(newFile2), ORC, new StorageOrcFileTailSource(), new StorageStripeMetadataSource(), new RaptorOrcAggregatedMemoryContext(), OrcTestingUtil.createDefaultTestConfig(), false, NO_ENCRYPTION, DwrfKeyProvider.EMPTY, new RuntimeStats());
orcReader.getColumnNames().equals(ImmutableList.of("7"));
// Add a column with the different ID with different type
File newFile3 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 10L, 13L), ImmutableList.of(createVarcharType(20), DOUBLE, createVarcharType(5))), path(newFile2), path(newFile3), new BitSet(5));
assertEquals(info.getRowCount(), 4);
assertEquals(readAllBytes(newFile2.toPath()), readAllBytes(newFile3.toPath()));
// Get prepared for the final file; make sure it is accessible from storage manager
UUID uuid = randomUUID();
File newFile4 = getFileSystemPath(new File(dataDir, "data/storage"), uuid);
// Optimized ORC writer does not create the file itself
newFile4.getParentFile().mkdirs();
newFile4.createNewFile();
// Drop a column and add a column; also delete 3 rows
BitSet rowsToDelete = new BitSet(5);
rowsToDelete.set(0);
rowsToDelete.set(1);
rowsToDelete.set(3);
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(7L, 13L, 18L), ImmutableList.of(createVarcharType(20), createVarcharType(5), INTEGER)), path(newFile3), path(newFile4), rowsToDelete);
assertEquals(info.getRowCount(), 1);
ConnectorPageSource source = storageManager.getPageSource(DEFAULT_RAPTOR_CONTEXT, DEFAULT_HIVE_FILE_CONTEXT, uuid, Optional.empty(), false, OptionalInt.empty(), ImmutableList.of(13L, 7L, 18L), ImmutableList.of(createVarcharType(5), createVarcharType(20), INTEGER), TupleDomain.all(), READER_ATTRIBUTES);
Page page = null;
while (page == null) {
page = source.getNextPage();
}
assertEquals(page.getPositionCount(), 1);
// Column 13L
Block column0 = page.getBlock(0);
assertTrue(column0.isNull(0));
// Column 7L
Block column1 = page.getBlock(1);
assertEquals(createVarcharType(20).getSlice(column1, 0), utf8Slice("3"));
// Column 8L
Block column2 = page.getBlock(2);
assertTrue(column2.isNull(0));
// Remove all the columns
File newFile5 = new File(temporary, randomUUID().toString());
info = createFileRewriter().rewrite(fileSystem, getColumnTypes(ImmutableList.of(13L, 18L), ImmutableList.of(createVarcharType(5), INTEGER)), path(newFile4), path(newFile5), new BitSet(5));
// Optimized writer will drop the file
assertEquals(info.getRowCount(), 0);
assertFalse(newFile5.exists());
dummyHandle.close();
deleteRecursively(dataDir.toPath(), ALLOW_INSECURE);
}
use of com.facebook.presto.common.Page in project presto by prestodb.
the class OrcTester method writeOrcColumnsPresto.
public static void writeOrcColumnsPresto(File outputFile, Format format, CompressionKind compression, Optional<DwrfWriterEncryption> dwrfWriterEncryption, List<Type> types, List<List<?>> values, WriterStats stats) throws Exception {
OrcWriter writer = createOrcWriter(outputFile, format.orcEncoding, compression, dwrfWriterEncryption, types, OrcWriterOptions.builder().build(), stats);
Block[] blocks = new Block[types.size()];
for (int i = 0; i < types.size(); i++) {
Type type = types.get(i);
BlockBuilder blockBuilder = type.createBlockBuilder(null, values.size());
for (Object value : values.get(i)) {
writeValue(type, blockBuilder, value);
}
blocks[i] = blockBuilder.build();
}
writer.write(new Page(blocks));
writer.close();
writer.validate(new FileOrcDataSource(outputFile, new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), true));
}
use of com.facebook.presto.common.Page in project presto by prestodb.
the class TestSelectiveOrcReader method testHiddenConstantColumns.
@Test
public void testHiddenConstantColumns() throws Exception {
Type type = BIGINT;
List<Type> types = ImmutableList.of(type);
List<List<?>> values = ImmutableList.of(ImmutableList.of(1L, 2L));
TempFile tempFile = new TempFile();
writeOrcColumnsPresto(tempFile.getFile(), DWRF, ZSTD, Optional.empty(), types, values, new OrcWriterStats());
// Hidden columns like partition columns use negative indices (-13).
int hiddenColumnIndex = -13;
Map<Integer, Type> includedColumns = ImmutableMap.of(hiddenColumnIndex, VARCHAR, 0, BIGINT);
List<Integer> outputColumns = ImmutableList.of(hiddenColumnIndex, 0);
Slice constantSlice = Slices.utf8Slice("partition_value");
Map<Integer, Object> constantValues = ImmutableMap.of(hiddenColumnIndex, constantSlice);
OrcAggregatedMemoryContext systemMemoryUsage = new TestingHiveOrcAggregatedMemoryContext();
TupleDomainFilter filter = BigintRange.of(1, 1, false);
Map<Subfield, TupleDomainFilter> subFieldFilter = toSubfieldFilter(filter);
OrcReaderSettings readerSettings = OrcTester.OrcReaderSettings.builder().setColumnFilters(ImmutableMap.of(0, subFieldFilter)).build();
try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile.getFile(), DWRF.getOrcEncoding(), OrcPredicate.TRUE, types, 1, readerSettings.getColumnFilters(), readerSettings.getFilterFunctions(), readerSettings.getFilterFunctionInputMapping(), readerSettings.getRequiredSubfields(), constantValues, ImmutableMap.of(), includedColumns, outputColumns, false, systemMemoryUsage, false)) {
Page page = recordReader.getNextPage();
assertEquals(page.getPositionCount(), 1);
Block partitionValueBlock = page.getBlock(0);
int length = partitionValueBlock.getSliceLength(0);
Slice varcharSlice = partitionValueBlock.getSlice(0, 0, length);
assertEquals(varcharSlice, constantSlice);
Block bigintBlock = page.getBlock(1);
assertEquals(bigintBlock.getLong(0), 1);
assertNull(recordReader.getNextPage());
}
}
use of com.facebook.presto.common.Page in project presto by prestodb.
the class TestSelectiveOrcReader method testAdaptiveBatchSizes.
@Test
public void testAdaptiveBatchSizes() throws Exception {
Type type = VARCHAR;
List<Type> types = ImmutableList.of(type);
TempFile tempFile = new TempFile();
List<String> values = new ArrayList<>();
int rowCount = 10000;
int longStringLength = 5000;
Random random = new Random();
long start = System.currentTimeMillis();
for (int i = 0; i < rowCount; ++i) {
if (i < MAX_BATCH_SIZE) {
StringBuilder builder = new StringBuilder();
for (int j = 0; j < longStringLength; ++j) {
builder.append(random.nextInt(10));
}
values.add(builder.toString());
} else {
values.add("");
}
}
System.out.println(System.currentTimeMillis() - start);
writeOrcColumnsPresto(tempFile.getFile(), DWRF, NONE, Optional.empty(), types, ImmutableList.of(values), new OrcWriterStats());
try (OrcSelectiveRecordReader recordReader = createCustomOrcSelectiveRecordReader(tempFile, OrcEncoding.DWRF, OrcPredicate.TRUE, type, MAX_BATCH_SIZE, false, false)) {
assertEquals(recordReader.getFileRowCount(), rowCount);
assertEquals(recordReader.getReaderRowCount(), rowCount);
assertEquals(recordReader.getFilePosition(), 0);
assertEquals(recordReader.getReaderPosition(), 0);
// Size of the first batch should equal to the initial batch size (set to MAX_BATCH_SIZE)
Page page = recordReader.getNextPage();
assertNotNull(page);
page = page.getLoadedPage();
assertEquals(page.getPositionCount(), MAX_BATCH_SIZE);
// Later batches should be adjusted based on maxCombinedBytesPerRow collected during the first batch read
while (true) {
page = recordReader.getNextPage();
assertNotNull(page);
page = page.getLoadedPage();
if (recordReader.getReadPositions() < rowCount) {
assertEquals(page.getPositionCount(), MAX_BLOCK_SIZE.toBytes() / (longStringLength + Integer.BYTES + Byte.BYTES));
} else {
break;
}
}
}
}
use of com.facebook.presto.common.Page in project presto by prestodb.
the class TestOrcWriter method testStreamOrder.
private void testStreamOrder(OrcEncoding encoding, CompressionKind kind, OptionalInt level, StreamLayoutFactory streamLayoutFactory, Supplier<Consumer<Stream>> streamConsumerFactory) throws IOException {
OrcWriterOptions orcWriterOptions = OrcWriterOptions.builder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).build()).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)).withCompressionLevel(level).withStreamLayoutFactory(streamLayoutFactory).build();
for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
TempFile tempFile = new TempFile();
OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR), encoding, kind, Optional.empty(), NO_ENCRYPTION, orcWriterOptions, ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, validationMode, new OrcWriterStats());
// write down some data with unsorted streams
String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
Block[] blocks = new Block[data.length];
int entries = 65536;
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
for (int i = 0; i < data.length; i++) {
byte[] bytes = data[i].getBytes();
for (int j = 0; j < entries; j++) {
// force to write different data
bytes[0] = (byte) ((bytes[0] + 1) % 128);
blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
blockBuilder.closeEntry();
}
blocks[i] = blockBuilder.build();
blockBuilder = blockBuilder.newBlockBuilderLike(null);
}
writer.write(new Page(blocks));
writer.close();
for (StripeFooter stripeFooter : OrcTester.getStripes(tempFile.getFile(), encoding)) {
Consumer<Stream> streamConsumer = streamConsumerFactory.get();
boolean dataStreamStarted = false;
for (Stream stream : stripeFooter.getStreams()) {
if (isIndexStream(stream)) {
assertFalse(dataStreamStarted);
continue;
}
dataStreamStarted = true;
streamConsumer.accept(stream);
}
}
}
}
Aggregations