use of io.trino.plugin.hive.metastore.StorageFormat in project trino by trinodb.
the class HiveMetadata method extractHiveStorageFormat.
private static HiveStorageFormat extractHiveStorageFormat(Table table) {
StorageFormat storageFormat = table.getStorage().getStorageFormat();
String outputFormat = storageFormat.getOutputFormat();
String serde = storageFormat.getSerde();
for (HiveStorageFormat format : HiveStorageFormat.values()) {
if (format.getOutputFormat().equals(outputFormat) && format.getSerde().equals(serde)) {
return format;
}
}
throw new TrinoException(HIVE_UNSUPPORTED_FORMAT, format("Output format %s with SerDe %s is not supported", outputFormat, serde));
}
use of io.trino.plugin.hive.metastore.StorageFormat in project trino by trinodb.
the class RcFileFileWriterFactory method createFileWriter.
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
rcFileEncoding = new BinaryRcFileEncoding(timeZone);
} else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerde())) {
rcFileEncoding = createTextVectorEncoding(schema);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getIdentity(), path, configuration);
OutputStream outputStream = fileSystem.create(path, false);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).buildOrThrow(), validationInputFactory));
} catch (Exception e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
use of io.trino.plugin.hive.metastore.StorageFormat in project trino by trinodb.
the class AbstractTestHiveFileFormats method createTestFileTrino.
public static FileSplit createTestFileTrino(String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, List<TestColumn> testColumns, ConnectorSession session, int numRows, HiveFileWriterFactory fileWriterFactory) {
// filter out partition keys, which are not written to the file
testColumns = testColumns.stream().filter(column -> !column.isPartitionKey()).collect(toImmutableList());
List<Type> types = testColumns.stream().map(TestColumn::getType).map(HiveType::valueOf).map(type -> type.getType(TESTING_TYPE_MANAGER)).collect(toList());
PageBuilder pageBuilder = new PageBuilder(types);
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
pageBuilder.declarePosition();
for (int columnNumber = 0; columnNumber < testColumns.size(); columnNumber++) {
serializeObject(types.get(columnNumber), pageBuilder.getBlockBuilder(columnNumber), testColumns.get(columnNumber).getWriteValue(), testColumns.get(columnNumber).getObjectInspector(), false);
}
}
Page page = pageBuilder.build();
JobConf jobConf = new JobConf();
configureCompression(jobConf, compressionCodec);
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", testColumns.stream().map(TestColumn::getName).collect(Collectors.joining(",")));
tableProperties.setProperty("columns.types", testColumns.stream().map(TestColumn::getType).collect(Collectors.joining(",")));
Optional<FileWriter> fileWriter = fileWriterFactory.createFileWriter(new Path(filePath), testColumns.stream().map(TestColumn::getName).collect(toList()), StorageFormat.fromHiveStorageFormat(storageFormat), tableProperties, jobConf, session, OptionalInt.empty(), NO_ACID_TRANSACTION, false, WriterKind.INSERT);
FileWriter hiveFileWriter = fileWriter.orElseThrow(() -> new IllegalArgumentException("fileWriterFactory"));
hiveFileWriter.appendRows(page);
hiveFileWriter.commit();
return new FileSplit(new Path(filePath), 0, new File(filePath).length(), new String[0]);
}
use of io.trino.plugin.hive.metastore.StorageFormat in project trino by trinodb.
the class TestProtoUtils method testStorageFormat.
@Test
public void testStorageFormat() {
alluxio.grpc.table.layout.hive.StorageFormat.Builder storageFormat = TestingAlluxioMetastoreObjects.getTestingStorageFormat();
StorageFormat fmt = ProtoUtils.fromProto(storageFormat.build());
assertEquals(storageFormat.getSerde(), fmt.getSerde());
assertEquals(storageFormat.getInputFormat(), fmt.getInputFormat());
assertEquals(storageFormat.getOutputFormat(), fmt.getOutputFormat());
}
use of io.trino.plugin.hive.metastore.StorageFormat in project trino by trinodb.
the class AbstractTestHive method doTestTransactionDeleteInsert.
private void doTestTransactionDeleteInsert(HiveStorageFormat storageFormat, SchemaTableName tableName, Domain domainToDrop, MaterializedResult insertData, MaterializedResult expectedData, TransactionDeleteInsertTestTag tag, boolean expectQuerySucceed, Optional<ConflictTrigger> conflictTrigger) throws Exception {
Path writePath = null;
Path targetPath = null;
try (Transaction transaction = newTransaction()) {
try {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
ConnectorSession session;
rollbackIfEquals(tag, ROLLBACK_RIGHT_AWAY);
// Query 1: delete
session = newSession();
HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("pk2");
TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(dsColumnHandle, domainToDrop));
Constraint constraint = new Constraint(tupleDomain, tupleDomain.asPredicate(), tupleDomain.getDomains().orElseThrow().keySet());
tableHandle = applyFilter(metadata, tableHandle, constraint);
tableHandle = metadata.applyDelete(session, tableHandle).get();
metadata.executeDelete(session, tableHandle);
rollbackIfEquals(tag, ROLLBACK_AFTER_DELETE);
// Query 2: insert
session = newSession();
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle, ImmutableList.of(), NO_RETRIES);
rollbackIfEquals(tag, ROLLBACK_AFTER_BEGIN_INSERT);
writePath = getStagingPathRoot(insertTableHandle);
targetPath = getTargetPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(insertData.toPage());
rollbackIfEquals(tag, ROLLBACK_AFTER_APPEND_PAGE);
Collection<Slice> fragments = getFutureValue(sink.finish());
rollbackIfEquals(tag, ROLLBACK_AFTER_SINK_FINISH);
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
rollbackIfEquals(tag, ROLLBACK_AFTER_FINISH_INSERT);
assertEquals(tag, COMMIT);
if (conflictTrigger.isPresent()) {
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
conflictTrigger.get().triggerConflict(session, tableName, insertTableHandle, partitionUpdates);
}
transaction.commit();
if (conflictTrigger.isPresent()) {
assertTrue(expectQuerySucceed);
conflictTrigger.get().verifyAndCleanup(session, tableName);
}
} catch (TestingRollbackException e) {
transaction.rollback();
} catch (TrinoException e) {
assertFalse(expectQuerySucceed);
if (conflictTrigger.isPresent()) {
conflictTrigger.get().verifyAndCleanup(newSession(), tableName);
}
}
}
// check that temporary files are removed
if (writePath != null && !writePath.equals(targetPath)) {
HdfsContext context = new HdfsContext(newSession());
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
assertFalse(fileSystem.exists(writePath));
}
try (Transaction transaction = newTransaction()) {
// verify partitions
List<String> partitionNames = transaction.getMetastore().getPartitionNames(tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, expectedData.getMaterializedRows().stream().map(row -> format("pk1=%s/pk2=%s", row.getField(1), row.getField(2))).distinct().collect(toImmutableList()));
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedData.getMaterializedRows());
}
}
Aggregations