use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class RcFileFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!RCFileOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding(timeZone);
} else if (ColumnarSerDe.class.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = RcFilePageSourceFactory.createTextVectorEncoding(schema);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OutputStream outputStream = fileSystem.create(path);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
} catch (Exception e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class OrcFileWriterFactory method createFileWriter.
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!OrcOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
CompressionKind compression = getCompression(schema, configuration);
// existing tables and partitions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
List<Type> dataFileColumnTypes = fileColumnTypes;
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
Optional<HiveFileWriter> deleteDeltaWriter = Optional.empty();
if (AcidUtils.isTablePropertyTransactional(schema) && !AcidUtils.isInsertOnlyTable(schema)) {
ImmutableList<String> orcFileColumnNames = ImmutableList.of(OrcPageSourceFactory.ACID_COLUMN_OPERATION, OrcPageSourceFactory.ACID_COLUMN_ORIGINAL_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_BUCKET, OrcPageSourceFactory.ACID_COLUMN_ROW_ID, OrcPageSourceFactory.ACID_COLUMN_CURRENT_TRANSACTION, OrcPageSourceFactory.ACID_COLUMN_ROW_STRUCT);
ImmutableList.Builder<RowType.Field> fieldsBuilder = ImmutableList.builder();
for (int i = 0; i < fileColumnNames.size(); i++) {
fieldsBuilder.add(new RowType.Field(Optional.of(fileColumnNames.get(i)), fileColumnTypes.get(i)));
}
ImmutableList<Type> orcFileColumnTypes = ImmutableList.of(INTEGER, BIGINT, INTEGER, BIGINT, BIGINT, RowType.from(fieldsBuilder.build()));
fileColumnNames = orcFileColumnNames;
fileColumnTypes = orcFileColumnTypes;
if (acidWriteType.isPresent() && acidWriteType.get() == HiveACIDWriteType.UPDATE) {
AcidOutputFormat.Options deleteOptions = acidOptions.get().clone().writingDeleteDelta(true);
Path deleteDeltaPath = AcidUtils.createFilename(path.getParent().getParent(), deleteOptions);
deleteDeltaWriter = createFileWriter(deleteDeltaPath, inputColumnNames, storageFormat, schema, configuration, session, Optional.of(deleteOptions), Optional.of(HiveACIDWriteType.DELETE));
}
}
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OrcDataSink orcDataSink = createOrcDataSink(session, fileSystem, path);
Optional<Supplier<OrcDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isOrcOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
FileStatus fileStatus = fileSystem.getFileStatus(path);
return new HdfsOrcDataSource(new OrcDataSourceId(path.toString()), fileStatus.getLen(), HiveSessionProperties.getOrcMaxMergeDistance(session), HiveSessionProperties.getOrcMaxBufferSize(session), HiveSessionProperties.getOrcStreamBufferSize(session), false, fileSystem.open(path), readStats, fileStatus.getModificationTime());
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new OrcFileWriter(orcDataSink, rollbackAction, fileColumnNames, fileColumnTypes, dataFileColumnTypes, compression, orcWriterOptions.withStripeMinSize(HiveSessionProperties.getOrcOptimizedWriterMinStripeSize(session)).withStripeMaxSize(HiveSessionProperties.getOrcOptimizedWriterMaxStripeSize(session)).withStripeMaxRowCount(HiveSessionProperties.getOrcOptimizedWriterMaxStripeRows(session)).withDictionaryMaxMemory(HiveSessionProperties.getOrcOptimizedWriterMaxDictionaryMemory(session)).withMaxStringStatisticsLimit(HiveSessionProperties.getOrcStringStatisticsLimit(session)), writeLegacyVersion, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).put("hive.acid.version", String.valueOf(AcidUtils.OrcAcidVersion.ORC_ACID_VERSION)).build(), validationInputFactory, HiveSessionProperties.getOrcOptimizedWriterValidateMode(session), stats, acidOptions, acidWriteType, deleteDeltaWriter, path));
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class TestHiveWriterFactory method testSortingPath.
@Test
public void testSortingPath() {
setUp();
String targetPath = "/tmp";
String writePath = "/tmp/table";
Optional<WriteIdInfo> writeIdInfo = Optional.of(new WriteIdInfo(1, 1, 0));
StorageFormat storageFormat = StorageFormat.fromHiveStorageFormat(ORC);
Storage storage = new Storage(storageFormat, "", Optional.empty(), false, ImmutableMap.of());
Table table = new Table("schema", "table", "user", "MANAGED_TABLE", storage, ImmutableList.of(new Column("col_1", HiveType.HIVE_INT, Optional.empty())), ImmutableList.of(), ImmutableMap.of("transactional", "true"), Optional.of("original"), Optional.of("expanded"));
HiveConfig hiveConfig = getHiveConfig();
HivePageSinkMetadata hivePageSinkMetadata = new HivePageSinkMetadata(new SchemaTableName("schema", "table"), Optional.of(table), ImmutableMap.of());
PageSorter pageSorter = new PagesIndexPageSorter(new PagesIndex.TestingFactory(false));
Metadata metadata = createTestMetadataManager();
TypeManager typeManager = new InternalTypeManager(metadata.getFunctionAndTypeManager());
HdfsConfiguration hdfsConfiguration = new HiveHdfsConfiguration(new HdfsConfigurationInitializer(hiveConfig), ImmutableSet.of());
HdfsEnvironment hdfsEnvironment = new HdfsEnvironment(hdfsConfiguration, hiveConfig, new NoHdfsAuthentication());
LocationService locationService = new HiveLocationService(hdfsEnvironment);
ConnectorSession session = newSession();
HiveWriterFactory hiveWriterFactory = new HiveWriterFactory(getDefaultHiveFileWriterFactories(hiveConfig), "schema", "table", false, HiveACIDWriteType.DELETE, ImmutableList.of(new HiveColumnHandle("col_1", HiveType.HIVE_INT, new TypeSignature("integer", ImmutableList.of()), 0, HiveColumnHandle.ColumnType.REGULAR, Optional.empty())), ORC, ORC, ImmutableMap.of(), OptionalInt.empty(), ImmutableList.of(), new LocationHandle(targetPath, writePath, false, LocationHandle.WriteMode.STAGE_AND_MOVE_TO_TARGET_DIRECTORY, writeIdInfo), locationService, session.getQueryId(), new HivePageSinkMetadataProvider(hivePageSinkMetadata, CachingHiveMetastore.memoizeMetastore(metastore, 1000), new HiveIdentity(session)), typeManager, hdfsEnvironment, pageSorter, hiveConfig.getWriterSortBufferSize(), hiveConfig.getMaxOpenSortFiles(), false, UTC, session, new TestingNodeManager("fake-environment"), new HiveEventClient(), new HiveSessionProperties(hiveConfig, new OrcFileWriterConfig(), new ParquetFileWriterConfig()), new HiveWriterStats(), getDefaultOrcFileWriterFactory(hiveConfig));
HiveWriter hiveWriter = hiveWriterFactory.createWriter(ImmutableList.of(), OptionalInt.empty(), Optional.empty());
assertEquals(((SortingFileWriter) hiveWriter.getFileWriter()).getTempFilePrefix().getName(), ".tmp-sort.bucket_00000");
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class AbstractTestHive method doTestMetadataDelete.
private void doTestMetadataDelete(HiveStorageFormat storageFormat, SchemaTableName tableName) throws Exception {
// creating the table
doCreateEmptyTable(tableName, storageFormat, CREATE_TABLE_COLUMNS_PARTITIONED);
insertData(tableName, CREATE_TABLE_PARTITIONED_DATA);
MaterializedResult.Builder expectedResultBuilder = MaterializedResult.resultBuilder(SESSION, CREATE_TABLE_PARTITIONED_DATA.getTypes());
expectedResultBuilder.rows(CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows());
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
// verify partitions were created
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, CREATE_TABLE_PARTITIONED_DATA.getMaterializedRows().stream().map(row -> "ds=" + row.getField(CREATE_TABLE_PARTITIONED_DATA.getTypes().size() - 1)).collect(toList()));
// verify table directory is not empty
Set<String> filesAfterInsert = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertFalse(filesAfterInsert.isEmpty());
// verify the data
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedResultBuilder.build().getMaterializedRows());
}
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
// get ds column handle
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds");
// delete ds=2015-07-03
session = newSession();
TupleDomain<ColumnHandle> tupleDomain = TupleDomain.fromFixedValues(ImmutableMap.of(dsColumnHandle, NullableValue.of(createUnboundedVarcharType(), utf8Slice("2015-07-03"))));
Constraint constraint = new Constraint(tupleDomain, convertToPredicate(tupleDomain));
tableHandle = applyFilter(metadata, tableHandle, constraint);
tableHandle = metadata.applyDelete(session, tableHandle).get();
metadata.executeDelete(session, tableHandle);
transaction.commit();
}
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds");
int dsColumnOrdinalPosition = columnHandles.indexOf(dsColumnHandle);
// verify the data
ImmutableList<MaterializedRow> expectedRows = expectedResultBuilder.build().getMaterializedRows().stream().filter(row -> !"2015-07-03".equals(row.getField(dsColumnOrdinalPosition))).collect(toImmutableList());
MaterializedResult actualAfterDelete = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(actualAfterDelete.getMaterializedRows(), expectedRows);
}
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("ds");
// delete ds=2015-07-01 and 2015-07-02
session = newSession();
TupleDomain<ColumnHandle> tupleDomain2 = TupleDomain.withColumnDomains(ImmutableMap.of(dsColumnHandle, Domain.create(ValueSet.ofRanges(Range.range(createUnboundedVarcharType(), utf8Slice("2015-07-01"), true, utf8Slice("2015-07-02"), true)), false)));
Constraint constraint2 = new Constraint(tupleDomain2, convertToPredicate(tupleDomain2));
tableHandle = applyFilter(metadata, tableHandle, constraint2);
tableHandle = metadata.applyDelete(session, tableHandle).get();
metadata.executeDelete(session, tableHandle);
transaction.commit();
}
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = ImmutableList.copyOf(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
session = newSession();
MaterializedResult actualAfterDelete2 = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(actualAfterDelete2.getMaterializedRows(), ImmutableList.of());
// verify table directory is empty
Set<String> filesAfterDelete = listAllDataFiles(transaction, tableName.getSchemaName(), tableName.getTableName());
assertTrue(filesAfterDelete.isEmpty());
}
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class AbstractTestHive method doTestTransactionDeleteInsert.
private void doTestTransactionDeleteInsert(HiveStorageFormat storageFormat, SchemaTableName tableName, Domain domainToDrop, MaterializedResult insertData, MaterializedResult expectedData, TransactionDeleteInsertTestTag tag, boolean expectQuerySucceed, Optional<ConflictTrigger> conflictTrigger) throws Exception {
Path writePath = null;
Path targetPath = null;
try (Transaction transaction = newTransaction()) {
try {
ConnectorMetadata metadata = transaction.getMetadata();
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
ConnectorSession session;
rollbackIfEquals(tag, ROLLBACK_RIGHT_AWAY);
// Query 1: delete
session = newSession();
HiveColumnHandle dsColumnHandle = (HiveColumnHandle) metadata.getColumnHandles(session, tableHandle).get("pk2");
TupleDomain<ColumnHandle> tupleDomain = TupleDomain.withColumnDomains(ImmutableMap.of(dsColumnHandle, domainToDrop));
Constraint constraint = new Constraint(tupleDomain, convertToPredicate(tupleDomain));
tableHandle = applyFilter(metadata, tableHandle, constraint);
tableHandle = metadata.applyDelete(session, tableHandle).get();
metadata.executeDelete(session, tableHandle);
rollbackIfEquals(tag, ROLLBACK_AFTER_DELETE);
// Query 2: insert
session = newSession();
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
rollbackIfEquals(tag, ROLLBACK_AFTER_BEGIN_INSERT);
writePath = getStagingPathRoot(insertTableHandle);
targetPath = getTargetPathRoot(insertTableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(insertData.toPage());
rollbackIfEquals(tag, ROLLBACK_AFTER_APPEND_PAGE);
Collection<Slice> fragments = getFutureValue(sink.finish());
rollbackIfEquals(tag, ROLLBACK_AFTER_SINK_FINISH);
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
rollbackIfEquals(tag, ROLLBACK_AFTER_FINISH_INSERT);
assertEquals(tag, COMMIT);
if (conflictTrigger.isPresent()) {
JsonCodec<PartitionUpdate> partitionUpdateCodec = JsonCodec.jsonCodec(PartitionUpdate.class);
List<PartitionUpdate> partitionUpdates = fragments.stream().map(Slice::getBytes).map(partitionUpdateCodec::fromJson).collect(toList());
conflictTrigger.get().triggerConflict(session, tableName, insertTableHandle, partitionUpdates);
}
transaction.commit();
if (conflictTrigger.isPresent()) {
assertTrue(expectQuerySucceed);
conflictTrigger.get().verifyAndCleanup(session, tableName);
}
} catch (TestingRollbackException e) {
transaction.rollback();
} catch (PrestoException e) {
assertFalse(expectQuerySucceed);
if (conflictTrigger.isPresent()) {
conflictTrigger.get().verifyAndCleanup(newSession(), tableName);
}
}
}
// check that temporary files are removed
if (writePath != null && !writePath.equals(targetPath)) {
HdfsContext context = new HdfsContext(newSession(), tableName.getSchemaName(), tableName.getTableName());
FileSystem fileSystem = hdfsEnvironment.getFileSystem(context, writePath);
assertFalse(fileSystem.exists(writePath));
}
try (Transaction transaction = newTransaction()) {
// verify partitions
List<String> partitionNames = transaction.getMetastore(tableName.getSchemaName()).getPartitionNames(new HiveIdentity(newSession()), tableName.getSchemaName(), tableName.getTableName()).orElseThrow(() -> new AssertionError("Table does not exist: " + tableName));
assertEqualsIgnoreOrder(partitionNames, expectedData.getMaterializedRows().stream().map(row -> format("pk1=%s/pk2=%s", row.getField(1), row.getField(2))).distinct().collect(toList()));
// load the new table
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
// verify the data
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEqualsIgnoreOrder(result.getMaterializedRows(), expectedData.getMaterializedRows());
}
}
Aggregations