use of io.trino.orc.metadata.CompressionKind in project trino by trinodb.
the class OrcFileWriterFactory method getCompression.
private static CompressionKind getCompression(Properties schema, JobConf configuration) {
String compressionName = OrcConf.COMPRESS.getString(schema, configuration);
if (compressionName == null) {
return CompressionKind.ZLIB;
}
CompressionKind compression;
try {
compression = CompressionKind.valueOf(compressionName.toUpperCase(ENGLISH));
} catch (IllegalArgumentException e) {
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, "Unknown ORC compression type " + compressionName);
}
return compression;
}
use of io.trino.orc.metadata.CompressionKind in project trino by trinodb.
the class OrcTester method assertRoundTrip.
private void assertRoundTrip(Type writeType, Type readType, List<?> writeValues, List<?> readValues) throws Exception {
OrcWriterStats stats = new OrcWriterStats();
for (CompressionKind compression : compressions) {
boolean hiveSupported = (compression != LZ4) && (compression != ZSTD) && !isTimestampTz(writeType) && !isTimestampTz(readType);
for (Format format : formats) {
// write Hive, read Trino
if (hiveSupported) {
try (TempFile tempFile = new TempFile()) {
writeOrcColumnHive(tempFile.getFile(), format, compression, writeType, writeValues.iterator());
assertFileContentsTrino(readType, tempFile, readValues, false, false);
}
}
}
// write Trino, read Hive and Trino
try (TempFile tempFile = new TempFile()) {
writeOrcColumnTrino(tempFile.getFile(), compression, writeType, writeValues.iterator(), stats);
if (hiveSupported) {
assertFileContentsHive(readType, tempFile, readValues);
}
assertFileContentsTrino(readType, tempFile, readValues, false, false);
if (skipBatchTestsEnabled) {
assertFileContentsTrino(readType, tempFile, readValues, true, false);
}
if (skipStripeTestsEnabled) {
assertFileContentsTrino(readType, tempFile, readValues, false, true);
}
}
}
assertEquals(stats.getWriterSizeInBytes(), 0);
}
use of io.trino.orc.metadata.CompressionKind in project trino by trinodb.
the class OrcFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
CompressionKind compression = getCompression(schema, configuration);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
if (transaction.isAcidDeleteOperation(writerKind)) {
// For delete, set the "row" column to -1
fileInputColumnIndexes[fileInputColumnIndexes.length - 1] = -1;
}
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
OrcDataSink orcDataSink = createOrcDataSink(fileSystem, path);
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileSystem.getFileStatus(path).getLen(), new OrcReaderOptions(), fileSystem.open(path), readStats);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
if (transaction.isInsert() && useAcidSchema) {
// Only add the ACID columns if the request is for insert-type operations - - for delete operations,
// the columns are added by the caller. This is because the ACID columns for delete operations
// depend on the rows being deleted, whereas the ACID columns for INSERT are completely determined
// by bucket and writeId.
Type rowType = createRowType(fileColumnNames, fileColumnTypes);
fileColumnNames = ACID_COLUMN_NAMES;
fileColumnTypes = createAcidColumnPrestoTypes(rowType);
}
return Optional.of(new OrcFileWriter(orcDataSink, writerKind, transaction, useAcidSchema, bucketNumber, rollbackAction, fileColumnNames, fileColumnTypes, createRootOrcType(fileColumnNames, fileColumnTypes), compression, getOrcWriterOptions(schema, orcWriterOptions).withStripeMinSize(getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(getOrcStringStatisticsLimit(session)), fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory, getOrcOptimizedWriterValidateMode(session), stats));
} catch (IOException e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
Aggregations