use of com.facebook.presto.common.io.OutputStreamDataSink in project presto by prestodb.
the class OrcTester method createOrcWriter.
public static OrcWriter createOrcWriter(File outputFile, OrcEncoding encoding, CompressionKind compression, Optional<DwrfWriterEncryption> dwrfWriterEncryption, List<Type> types, OrcWriterOptions writerOptions, WriterStats stats) throws FileNotFoundException {
List<String> columnNames = makeColumnNames(types.size());
ImmutableMap.Builder<String, String> metadata = ImmutableMap.builder();
metadata.put("columns", String.join(", ", columnNames));
metadata.put("columns.types", createSettableStructObjectInspector(types).getTypeName());
OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(outputFile)), columnNames, types, encoding, compression, dwrfWriterEncryption, new DwrfEncryptionProvider(new UnsupportedEncryptionLibrary(), new TestingEncryptionLibrary()), writerOptions, ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, BOTH, stats);
return writer;
}
use of com.facebook.presto.common.io.OutputStreamDataSink in project presto by prestodb.
the class TestOrcWriter method testStreamOrder.
private void testStreamOrder(OrcEncoding encoding, CompressionKind kind, OptionalInt level, StreamLayoutFactory streamLayoutFactory, Supplier<Consumer<Stream>> streamConsumerFactory) throws IOException {
OrcWriterOptions orcWriterOptions = OrcWriterOptions.builder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(new DataSize(0, MEGABYTE)).withStripeMaxSize(new DataSize(32, MEGABYTE)).withStripeMaxRowCount(ORC_STRIPE_SIZE).build()).withRowGroupMaxRowCount(ORC_ROW_GROUP_SIZE).withDictionaryMaxMemory(new DataSize(32, MEGABYTE)).withCompressionLevel(level).withStreamLayoutFactory(streamLayoutFactory).build();
for (OrcWriteValidationMode validationMode : OrcWriteValidationMode.values()) {
TempFile tempFile = new TempFile();
OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(tempFile.getFile())), ImmutableList.of("test1", "test2", "test3", "test4", "test5"), ImmutableList.of(VARCHAR, VARCHAR, VARCHAR, VARCHAR, VARCHAR), encoding, kind, Optional.empty(), NO_ENCRYPTION, orcWriterOptions, ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, validationMode, new OrcWriterStats());
// write down some data with unsorted streams
String[] data = new String[] { "a", "bbbbb", "ccc", "dd", "eeee" };
Block[] blocks = new Block[data.length];
int entries = 65536;
BlockBuilder blockBuilder = VARCHAR.createBlockBuilder(null, entries);
for (int i = 0; i < data.length; i++) {
byte[] bytes = data[i].getBytes();
for (int j = 0; j < entries; j++) {
// force to write different data
bytes[0] = (byte) ((bytes[0] + 1) % 128);
blockBuilder.writeBytes(Slices.wrappedBuffer(bytes, 0, bytes.length), 0, bytes.length);
blockBuilder.closeEntry();
}
blocks[i] = blockBuilder.build();
blockBuilder = blockBuilder.newBlockBuilderLike(null);
}
writer.write(new Page(blocks));
writer.close();
for (StripeFooter stripeFooter : OrcTester.getStripes(tempFile.getFile(), encoding)) {
Consumer<Stream> streamConsumer = streamConsumerFactory.get();
boolean dataStreamStarted = false;
for (Stream stream : stripeFooter.getStreams()) {
if (isIndexStream(stream)) {
assertFalse(dataStreamStarted);
continue;
}
dataStreamStarted = true;
streamConsumer.accept(stream);
}
}
}
}
use of com.facebook.presto.common.io.OutputStreamDataSink in project presto by prestodb.
the class TestTempStorageSingleStreamSpiller method assertSpill.
private void assertSpill(boolean compression, boolean encryption) throws Exception {
File spillPath = new File(tempDirectory, UUID.randomUUID().toString());
TempStorageSingleStreamSpillerFactory spillerFactory = new TempStorageSingleStreamSpillerFactory(new TestingTempStorageManager(spillPath.toString()), // executor won't be closed, because we don't call destroy() on the spiller factory
executor, new BlockEncodingManager(), new SpillerStats(), compression, encryption, LocalTempStorage.NAME);
LocalMemoryContext memoryContext = newSimpleAggregatedMemoryContext().newLocalMemoryContext("test");
SingleStreamSpiller singleStreamSpiller = spillerFactory.create(TYPES, new TestingSpillContext(), memoryContext);
assertTrue(singleStreamSpiller instanceof TempStorageSingleStreamSpiller);
TempStorageSingleStreamSpiller spiller = (TempStorageSingleStreamSpiller) singleStreamSpiller;
Page page = buildPage();
// The spillers will reserve memory in their constructors
int retainedSizeForEmptyDataSink = toIntExact(new OutputStreamDataSink(new DynamicSliceOutput(0)).getRetainedSizeInBytes());
assertEquals(memoryContext.getBytes(), retainedSizeForEmptyDataSink);
spiller.spill(page).get();
spiller.spill(Iterators.forArray(page, page, page)).get();
assertEquals(listFiles(spillPath.toPath()).size(), 1);
// The spillers release their memory reservations when they are closed, therefore at this point
// they will have non-zero memory reservation.
// assertEquals(memoryContext.getBytes(), 0);
Iterator<Page> spilledPagesIterator = spiller.getSpilledPages();
assertEquals(memoryContext.getBytes(), retainedSizeForEmptyDataSink);
ImmutableList<Page> spilledPages = ImmutableList.copyOf(spilledPagesIterator);
// The spillers release their memory reservations when they are closed, therefore at this point
// they will have non-zero memory reservation.
// assertEquals(memoryContext.getBytes(), 0);
assertEquals(4, spilledPages.size());
for (int i = 0; i < 4; ++i) {
PageAssertions.assertPageEquals(TYPES, page, spilledPages.get(i));
}
// Assert the spill codec flags match the expected configuration
try (InputStream is = newInputStream(listFiles(spillPath.toPath()).get(0))) {
Iterator<SerializedPage> serializedPages = PagesSerdeUtil.readSerializedPages(new InputStreamSliceInput(is));
assertTrue(serializedPages.hasNext(), "at least one page should be successfully read back");
byte markers = serializedPages.next().getPageCodecMarkers();
assertEquals(PageCodecMarker.COMPRESSED.isSet(markers), compression);
assertEquals(PageCodecMarker.ENCRYPTED.isSet(markers), encryption);
}
spiller.close();
assertEquals(listFiles(spillPath.toPath()).size(), 0);
assertEquals(memoryContext.getBytes(), 0);
}
use of com.facebook.presto.common.io.OutputStreamDataSink in project presto by prestodb.
the class IcebergFileWriterFactory method createOrcWriter.
private IcebergFileWriter createOrcWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session) {
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), outputPath, jobConf);
DataSink orcDataSink = hdfsEnvironment.doAs(session.getUser(), () -> new OutputStreamDataSink(fileSystem.create(outputPath)));
Callable<Void> rollbackAction = () -> {
hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.delete(outputPath, false));
return null;
};
List<Types.NestedField> columnFields = icebergSchema.columns();
List<String> fileColumnNames = columnFields.stream().map(Types.NestedField::name).collect(toImmutableList());
List<Type> fileColumnTypes = columnFields.stream().map(Types.NestedField::type).map(type -> toPrestoType(type, typeManager)).collect(toImmutableList());
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(outputPath.toString()), hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.getFileStatus(outputPath).getLen()), getOrcMaxMergeDistance(session), getOrcMaxBufferSize(session), getOrcStreamBufferSize(session), false, hdfsEnvironment.doAs(session.getUser(), () -> fileSystem.open(outputPath)), readStats);
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITE_VALIDATION_FAILED, e);
}
});
}
return new IcebergOrcFileWriter(icebergSchema, orcDataSink, rollbackAction, ORC, fileColumnNames, fileColumnTypes, toOrcType(icebergSchema), getCompressionCodec(session).getOrcCompressionKind(), orcFileWriterConfig.toOrcWriterOptionsBuilder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).build()).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)).build(), IntStream.range(0, fileColumnNames.size()).toArray(), ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), UTC, validationInputFactory, getOrcOptimizedWriterValidateMode(session), orcWriterStats, dwrfEncryptionProvider, Optional.empty());
} catch (IOException e) {
throw new PrestoException(ICEBERG_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of com.facebook.presto.common.io.OutputStreamDataSink in project presto by prestodb.
the class AbstractTestDwrfStripeCaching method writeOrcFile.
/**
* Creates a file with 3 INT columns and 4 stripes with 100 rows each with the
* following values:
* Column 0: row number
* Column 1: Integer.MAX_VALUE
* Column 2: row number * 10
*/
private static TempFile writeOrcFile(boolean cacheEnabled, DwrfStripeCacheMode cacheMode, DataSize cacheMaxSize) {
TempFile outputFile = new TempFile();
try {
Type type = INTEGER;
List<Type> types = ImmutableList.of(type, type, type);
OrcWriterOptions writerOptions = OrcWriterOptions.builder().withFlushPolicy(DefaultOrcWriterFlushPolicy.builder().withStripeMaxRowCount(100).build()).withDwrfStripeCacheEnabled(cacheEnabled).withDwrfStripeCacheMode(cacheMode).withDwrfStripeCacheMaxSize(cacheMaxSize).build();
OrcWriter writer = new OrcWriter(new OutputStreamDataSink(new FileOutputStream(outputFile.getFile())), ImmutableList.of("Int1", "Int2", "Int3"), types, DWRF, ZLIB, Optional.empty(), NO_ENCRYPTION, writerOptions, ImmutableMap.of(), HIVE_STORAGE_TIME_ZONE, true, BOTH, new OrcWriterStats());
// write 4 stripes with 100 values each
int count = 0;
for (int stripe = 0; stripe < 4; stripe++) {
BlockBuilder[] blockBuilders = new BlockBuilder[3];
for (int i = 0; i < blockBuilders.length; i++) {
blockBuilders[i] = type.createBlockBuilder(null, 100);
}
for (int row = 0; row < 100; row++) {
blockBuilders[0].writeInt(count);
blockBuilders[1].writeInt(Integer.MAX_VALUE);
blockBuilders[2].writeInt(count * 10);
count++;
}
Block[] blocks = new Block[blockBuilders.length];
for (int i = 0; i < blocks.length; i++) {
blocks[i] = blockBuilders[i].build();
}
writer.write(new Page(blocks));
}
writer.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return outputFile;
}
Aggregations