use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class CarbondataMetadata method beginCreateTable.
@Override
public CarbondataOutputTableHandle beginCreateTable(ConnectorSession session, ConnectorTableMetadata tableMetadata, Optional<ConnectorNewTableLayout> layout) {
// get the root directory for the database
SchemaTableName finalSchemaTableName = tableMetadata.getTable();
String finalSchemaName = finalSchemaTableName.getSchemaName();
String finalTableName = finalSchemaTableName.getTableName();
this.user = session.getUser();
this.schemaName = finalSchemaName;
currentState = State.CREATE_TABLE_AS;
List<String> partitionedBy = new ArrayList<String>();
List<SortingColumn> sortBy = new ArrayList<SortingColumn>();
List<HiveColumnHandle> columnHandles = new ArrayList<HiveColumnHandle>();
Map<String, String> tableProperties = new HashMap<String, String>();
getParametersForCreateTable(session, tableMetadata, partitionedBy, sortBy, columnHandles, tableProperties);
metastore.getDatabase(finalSchemaName).orElseThrow(() -> new SchemaNotFoundException(finalSchemaName));
// to avoid type mismatch between HiveStorageFormat & Carbondata StorageFormat this hack no option
HiveStorageFormat tableStorageFormat = HiveStorageFormat.valueOf("CARBON");
HiveStorageFormat partitionStorageFormat = tableStorageFormat;
Map<String, HiveColumnHandle> columnHandlesByName = Maps.uniqueIndex(columnHandles, HiveColumnHandle::getName);
List<Column> partitionColumns = partitionedBy.stream().map(columnHandlesByName::get).map(column -> new Column(column.getName(), column.getHiveType(), column.getComment())).collect(toList());
checkPartitionTypesSupported(partitionColumns);
// it will get final path to create carbon table
LocationHandle locationHandle = getCarbonDataTableCreationPath(session, tableMetadata, HiveWriteUtils.OpertionType.CREATE_TABLE_AS);
Path targetPath = locationService.getTableWriteInfo(locationHandle, false).getTargetPath();
AbsoluteTableIdentifier finalAbsoluteTableIdentifier = AbsoluteTableIdentifier.from(targetPath.toString(), new CarbonTableIdentifier(finalSchemaName, finalTableName, UUID.randomUUID().toString()));
hdfsEnvironment.doAs(session.getUser(), () -> {
initialConfiguration = ConfigurationUtils.toJobConf(this.hdfsEnvironment.getConfiguration(new HdfsEnvironment.HdfsContext(session, finalSchemaName, finalTableName), new Path(locationHandle.getJsonSerializableTargetPath())));
// Create Carbondata metadata folder and Schema file
CarbondataMetadataUtils.createMetaDataFolderSchemaFile(hdfsEnvironment, session, columnHandles, finalAbsoluteTableIdentifier, partitionedBy, sortBy.stream().map(s -> s.getColumnName().toLowerCase(Locale.ENGLISH)).collect(toList()), targetPath.toString(), initialConfiguration);
this.tableStorageLocation = Optional.of(targetPath.toString());
Path outputPath = new Path(locationHandle.getJsonSerializableTargetPath());
Properties schema = readSchemaForCarbon(finalSchemaName, finalTableName, targetPath, columnHandles, partitionColumns);
// Create committer object
setupCommitWriter(schema, outputPath, initialConfiguration, false);
});
try {
CarbondataOutputTableHandle result = new CarbondataOutputTableHandle(finalSchemaName, finalTableName, columnHandles, metastore.generatePageSinkMetadata(new HiveIdentity(session), finalSchemaTableName), locationHandle, tableStorageFormat, partitionStorageFormat, partitionedBy, Optional.empty(), session.getUser(), tableProperties, ImmutableMap.<String, String>of(EncodedLoadModel, jobContext.getConfiguration().get(LOAD_MODEL)));
LocationService.WriteInfo writeInfo = locationService.getQueryWriteInfo(locationHandle);
metastore.declareIntentionToWrite(session, writeInfo.getWriteMode(), writeInfo.getWritePath(), finalSchemaTableName);
return result;
} catch (RuntimeException ex) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Error: creating table: %s ", ex.getMessage()), ex);
}
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class HiveWriterFactory method createWriter.
private HiveWriter createWriter(List<String> partitionValues, OptionalInt bucketNumber, Optional<Options> vacuumOptions, boolean forMerge) {
boolean isTxnTable = isTxnTable();
if (bucketCount.isPresent()) {
checkArgument(bucketNumber.isPresent(), "Bucket not provided for bucketed table");
checkArgument(bucketNumber.getAsInt() < bucketCount.getAsInt(), "Bucket number %s must be less than bucket count %s", bucketNumber, bucketCount);
} else {
checkArgument(isTxnTable || !bucketNumber.isPresent(), "Bucket number provided by for table that is not bucketed");
}
String fileName;
if (bucketNumber.isPresent()) {
fileName = computeBucketedFileName(queryId, bucketNumber.getAsInt());
} else {
// Snapshot: don't use UUID. File name needs to be deterministic.
if (isSnapshotEnabled) {
fileName = String.format(ENGLISH, "%s_%d_%d_%d", queryId, session.getTaskId().getAsInt(), session.getPipelineId().getAsInt(), session.getDriverId().getAsInt());
} else {
fileName = queryId + "_" + randomUUID();
}
}
Optional<String> partitionName;
if (!partitionColumnNames.isEmpty()) {
partitionName = Optional.of(FileUtils.makePartName(partitionColumnNames, partitionValues));
} else {
partitionName = Optional.empty();
}
// attempt to get the existing partition (if this is an existing partitioned table)
Optional<Partition> partition = Optional.empty();
if (!partitionValues.isEmpty() && table != null) {
partition = pageSinkMetadataProvider.getPartition(partitionValues);
}
UpdateMode updateMode;
Properties schema;
WriteInfo writeInfo;
StorageFormat outputStorageFormat;
if (!partition.isPresent()) {
if (table == null) {
// Write to: a new partition in a new partitioned table,
// or a new unpartitioned table.
updateMode = UpdateMode.NEW;
schema = new Properties();
schema.setProperty(IOConstants.COLUMNS, dataColumns.stream().map(DataColumn::getName).collect(joining(",")));
schema.setProperty(IOConstants.COLUMNS_TYPES, dataColumns.stream().map(DataColumn::getHiveType).map(HiveType::getHiveTypeName).map(HiveTypeName::toString).collect(joining(":")));
setAdditionalSchemaProperties(schema);
if (!partitionName.isPresent()) {
// new unpartitioned table
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
} else {
// a new partition in a new partitioned table
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
if (!writeInfo.getWriteMode().isWritePathSameAsTargetPath()) {
// verify that the target directory for the partition does not already exist
if (HiveWriteUtils.pathExists(new HdfsContext(session, schemaName, tableName), hdfsEnvironment, writeInfo.getTargetPath())) {
throw new PrestoException(HIVE_PATH_ALREADY_EXISTS, format("Target directory for new partition '%s' of table '%s.%s' already exists: %s", partitionName, schemaName, tableName, writeInfo.getTargetPath()));
}
}
}
} else {
// or an existing unpartitioned table
if (partitionName.isPresent()) {
// a new partition in an existing partitioned table
updateMode = UpdateMode.NEW;
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
} else {
switch(insertExistingPartitionsBehavior) {
case APPEND:
checkState(!immutablePartitions);
updateMode = UpdateMode.APPEND;
writeInfo = locationService.getTableWriteInfo(locationHandle, false);
break;
case OVERWRITE:
updateMode = UpdateMode.OVERWRITE;
writeInfo = locationService.getTableWriteInfo(locationHandle, true);
break;
case ERROR:
throw new PrestoException(HIVE_TABLE_READ_ONLY, "Unpartitioned Hive tables are immutable");
default:
throw new IllegalArgumentException("Unsupported insert existing table behavior: " + insertExistingPartitionsBehavior);
}
}
schema = getHiveSchema(table);
}
if (partitionName.isPresent()) {
// Write to a new partition
outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
} else {
// Write to a new/existing unpartitioned table
outputStorageFormat = fromHiveStorageFormat(tableStorageFormat);
}
} else {
// Write to: an existing partition in an existing partitioned table
if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.APPEND) {
// Append to an existing partition
checkState(!immutablePartitions);
updateMode = UpdateMode.APPEND;
// Check the column types in partition schema match the column types in table schema
List<Column> tableColumns = table.getDataColumns();
List<Column> existingPartitionColumns = partition.get().getColumns();
for (int i = 0; i < min(existingPartitionColumns.size(), tableColumns.size()); i++) {
HiveType tableType = tableColumns.get(i).getType();
HiveType partitionType = existingPartitionColumns.get(i).getType();
if (!tableType.equals(partitionType)) {
throw new PrestoException(HIVE_PARTITION_SCHEMA_MISMATCH, format("" + "You are trying to write into an existing partition in a table. " + "The table schema has changed since the creation of the partition. " + "Inserting rows into such partition is not supported. " + "The column '%s' in table '%s' is declared as type '%s', " + "but partition '%s' declared column '%s' as type '%s'.", tableColumns.get(i).getName(), tableName, tableType, partitionName, existingPartitionColumns.get(i).getName(), partitionType));
}
}
HiveWriteUtils.checkPartitionIsWritable(partitionName.get(), partition.get());
outputStorageFormat = partition.get().getStorage().getStorageFormat();
schema = getHiveSchema(partition.get(), table);
writeInfo = locationService.getPartitionWriteInfo(locationHandle, partition, partitionName.get());
} else if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.OVERWRITE) {
// Overwrite an existing partition
//
// The behavior of overwrite considered as if first dropping the partition and inserting a new partition, thus:
// * No partition writable check is required.
// * Table schema and storage format is used for the new partition (instead of existing partition schema and storage format).
updateMode = UpdateMode.OVERWRITE;
outputStorageFormat = fromHiveStorageFormat(partitionStorageFormat);
schema = getHiveSchema(table);
writeInfo = locationService.getPartitionWriteInfo(locationHandle, Optional.empty(), partitionName.get());
checkWriteMode(writeInfo);
} else if (insertExistingPartitionsBehavior == InsertExistingPartitionsBehavior.ERROR) {
throw new PrestoException(HIVE_PARTITION_READ_ONLY, "Cannot insert into an existing partition of Hive table: " + partitionName.get());
} else {
throw new IllegalArgumentException(format("Unsupported insert existing partitions behavior: %s", insertExistingPartitionsBehavior));
}
}
schema.putAll(additionalTableParameters);
if (acidWriteType != HiveACIDWriteType.DELETE) {
validateSchema(partitionName, schema);
}
Path path;
Optional<AcidOutputFormat.Options> acidOptions;
String fileNameWithExtension;
if (isTxnTable) {
WriteIdInfo writeIdInfo = locationHandle.getJsonSerializablewriteIdInfo().get();
AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).minimumWriteId(writeIdInfo.getMinWriteId()).maximumWriteId(writeIdInfo.getMaxWriteId()).statementId(writeIdInfo.getStatementId()).bucket(bucketNumber.isPresent() ? bucketNumber.getAsInt() : 0);
if (acidWriteType == HiveACIDWriteType.DELETE) {
// to support delete as insert
options.writingDeleteDelta(true);
} else if (acidWriteType == HiveACIDWriteType.INSERT_OVERWRITE) {
// In case of ACID txn tables, dont delete old data. Just create new base in same partition.
options.writingBase(true);
}
if (vacuumOptions.isPresent() && HiveACIDWriteType.isVacuum(acidWriteType)) {
Options vOptions = vacuumOptions.get();
// Use the original bucket file number itself.
// Compacted delta directories will not have statementId
options.maximumWriteId(vOptions.getMaximumWriteId()).minimumWriteId(vOptions.getMinimumWriteId()).writingBase(vOptions.isWritingBase()).writingDeleteDelta(vOptions.isWritingDeleteDelta()).bucket(vOptions.getBucketId()).statementId(-1);
}
if (AcidUtils.isInsertOnlyTable(schema)) {
String subdir;
if (options.isWritingBase()) {
subdir = AcidUtils.baseDir(options.getMaximumWriteId());
} else if (HiveACIDWriteType.isVacuum(acidWriteType)) {
// Only for Minor compacted delta will not have statement Id.
subdir = AcidUtils.deltaSubdir(options.getMinimumWriteId(), options.getMaximumWriteId());
} else {
subdir = AcidUtils.deltaSubdir(options.getMinimumWriteId(), options.getMaximumWriteId(), options.getStatementId());
}
Path parentDir = new Path(writeInfo.getWritePath(), subdir);
fileName = String.format("%06d", options.getBucketId()) + "_0" + getFileExtension(conf, outputStorageFormat);
path = new Path(parentDir, fileName);
Properties properties = new Properties();
properties.setProperty("transactional_properties", "insert_only");
options.tableProperties(properties);
} else {
path = AcidUtils.createFilename(writeInfo.getWritePath(), options);
}
// In case of ACID entire delta directory should be renamed from staging directory.
fileNameWithExtension = path.getParent().getName();
acidOptions = Optional.of(options);
} else {
fileNameWithExtension = fileName + getFileExtension(conf, outputStorageFormat);
path = new Path(writeInfo.getWritePath(), fileNameWithExtension);
acidOptions = Optional.empty();
}
FileSystem fileSystem;
try {
fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);
} catch (IOException e) {
throw new PrestoException(HIVE_WRITER_OPEN_ERROR, e);
}
if (isSnapshotEnabled) {
// Snapshot: use a recognizable name pattern, in case they need to be deleted/renamed
String oldFileName = path.getName();
String newFileName = toSnapshotFileName(oldFileName, queryId);
path = new Path(path.getParent(), newFileName);
if (fileNameWithExtension.equals(oldFileName)) {
fileNameWithExtension = newFileName;
}
}
HiveFileWriter hiveFileWriter = null;
if (isSnapshotEnabled && !forMerge) {
// Add a suffix to file name for sub files
String oldFileName = path.getName();
String newFileName = toSnapshotSubFile(oldFileName);
path = new Path(path.getParent(), newFileName);
if (fileNameWithExtension.equals(oldFileName)) {
fileNameWithExtension = newFileName;
}
// Always create a simple ORC writer for snapshot files. These will be merged in the end.
logContainingFolderInfo(fileSystem, path, "Creating SnapshotTempFileWriter for %s", path);
try {
Path finalPath = path;
hiveFileWriter = new SnapshotTempFileWriter(orcFileWriterFactory.createOrcDataSink(session, fileSystem, path), dataColumns.stream().map(column -> column.getHiveType().getType(typeManager)).collect(Collectors.toList()));
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating ORC file", e);
}
} else {
conf.set("table.write.path", writeInfo.getWritePath().toString());
for (HiveFileWriterFactory fileWriterFactory : fileWriterFactories) {
Optional<HiveFileWriter> fileWriter = fileWriterFactory.createFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, conf, session, acidOptions, Optional.of(acidWriteType));
if (fileWriter.isPresent()) {
hiveFileWriter = fileWriter.get();
break;
}
}
if (isSnapshotEnabled) {
// TODO-cp-I2BZ0A: assuming all files to be of ORC type
checkState(hiveFileWriter instanceof OrcFileWriter, "Only support ORC format with snapshot");
logContainingFolderInfo(fileSystem, path, "Creating file writer for final result: %s", path);
}
if (hiveFileWriter == null) {
hiveFileWriter = new RecordFileWriter(path, dataColumns.stream().map(DataColumn::getName).collect(toList()), outputStorageFormat, schema, partitionStorageFormat.getEstimatedWriterSystemMemoryUsage(), conf, typeManager, parquetTimeZone, session);
}
if (isTxnTable) {
hiveFileWriter.initWriter(true, path, fileSystem);
}
}
Path finalPath = path;
String writerImplementation = hiveFileWriter.getClass().getName();
Consumer<HiveWriter> onCommit;
if (isSnapshotEnabled && !forMerge) {
// Only send "commit" event for the merged file
onCommit = hiveWriter -> {
};
} else {
onCommit = hiveWriter -> {
Optional<Long> size;
try {
size = Optional.of(hdfsEnvironment.getFileSystem(session.getUser(), finalPath, conf).getFileStatus(finalPath).getLen());
} catch (IOException | RuntimeException e) {
// Do not fail the query if file system is not available
size = Optional.empty();
}
eventClient.post(new WriteCompletedEvent(session.getQueryId(), finalPath.toString(), schemaName, tableName, partitionName.orElse(null), outputStorageFormat.getOutputFormat(), writerImplementation, nodeManager.getCurrentNode().getVersion(), nodeManager.getCurrentNode().getHost(), session.getIdentity().getPrincipal().map(Principal::getName).orElse(null), nodeManager.getEnvironment(), sessionProperties, size.orElse(null), hiveWriter.getRowCount()));
};
}
if (!sortedBy.isEmpty() || (isTxnTable() && HiveACIDWriteType.isUpdateOrDelete(acidWriteType))) {
List<Type> types = dataColumns.stream().map(column -> column.getHiveType().getType(typeManager)).collect(Collectors.toList());
Map<String, Integer> columnIndexes = new HashMap<>();
for (int i = 0; i < dataColumns.size(); i++) {
columnIndexes.put(dataColumns.get(i).getName(), i);
}
if (sortedBy.isEmpty() && isTxnTable() && HiveACIDWriteType.isUpdateOrDelete(acidWriteType)) {
// Add $rowId column as the last column in the page
types.add(HiveColumnHandle.updateRowIdHandle().getHiveType().getType(typeManager));
columnIndexes.put(HiveColumnHandle.UPDATE_ROW_ID_COLUMN_NAME, dataColumns.size());
}
List<Integer> sortFields = new ArrayList<>();
List<SortOrder> sortOrders = new ArrayList<>();
List<SortingColumn> sortigColumns = this.sortedBy;
if (sortedBy.isEmpty() && isTxnTable() && HiveACIDWriteType.isUpdateOrDelete(acidWriteType)) {
sortigColumns = ImmutableList.of(new SortingColumn(HiveColumnHandle.UPDATE_ROW_ID_COLUMN_NAME, SortingColumn.Order.ASCENDING));
}
for (SortingColumn column : sortigColumns) {
Integer index = columnIndexes.get(column.getColumnName());
if (index == null) {
throw new PrestoException(HIVE_INVALID_METADATA, format("Sorting column '%s' does not exist in table '%s.%s'", column.getColumnName(), schemaName, tableName));
}
sortFields.add(index);
sortOrders.add(column.getOrder().getSortOrder());
}
FileSystem sortFileSystem = fileSystem;
String child = ".tmp-sort." + path.getName();
Path tempFilePrefix = new Path(path.getParent(), child);
hiveFileWriter = new SortingFileWriter(sortFileSystem, tempFilePrefix, hiveFileWriter, sortBufferSize, maxOpenSortFiles, types, sortFields, sortOrders, pageSorter, (fs, p) -> orcFileWriterFactory.createOrcDataSink(session, fs, p));
}
return new HiveWriter(hiveFileWriter, partitionName, updateMode, fileNameWithExtension, writeInfo.getWritePath().toString(), writeInfo.getTargetPath().toString(), path.toString(), onCommit, // Snapshot: only update stats when merging files
isSnapshotEnabled && !forMerge ? null : hiveWriterStats, hiveFileWriter.getExtraPartitionFiles());
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class AbstractTestHiveFileFormats method createTestFilePresto.
public static FileSplit createTestFilePresto(String filePath, HiveStorageFormat storageFormat, HiveCompressionCodec compressionCodec, List<TestColumn> columns, ConnectorSession session, int numRows, HiveFileWriterFactory fileWriterFactory) {
List<TestColumn> testColumns = columns;
// filter out partition keys, which are not written to the file
testColumns = ImmutableList.copyOf(filter(testColumns, not(TestColumn::isPartitionKey)));
List<Type> types = testColumns.stream().map(TestColumn::getType).map(HiveType::valueOf).map(type -> type.getType(TYPE_MANAGER)).collect(toList());
PageBuilder pageBuilder = new PageBuilder(types);
for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
pageBuilder.declarePosition();
for (int columnNumber = 0; columnNumber < testColumns.size(); columnNumber++) {
serializeObject(types.get(columnNumber), pageBuilder.getBlockBuilder(columnNumber), testColumns.get(columnNumber).getWriteValue(), testColumns.get(columnNumber).getObjectInspector(), false);
}
}
Page page = pageBuilder.build();
JobConf jobConf = new JobConf();
configureCompression(jobConf, compressionCodec);
Properties tableProperties = new Properties();
tableProperties.setProperty("columns", Joiner.on(',').join(transform(testColumns, TestColumn::getName)));
tableProperties.setProperty("columns.types", Joiner.on(',').join(transform(testColumns, TestColumn::getType)));
Optional<HiveFileWriter> fileWriter = fileWriterFactory.createFileWriter(new Path(filePath), testColumns.stream().map(TestColumn::getName).collect(toList()), StorageFormat.fromHiveStorageFormat(storageFormat), tableProperties, jobConf, session, Optional.empty(), Optional.empty());
HiveFileWriter hiveFileWriter = fileWriter.orElseThrow(() -> new IllegalArgumentException("fileWriterFactory"));
hiveFileWriter.appendRows(page);
hiveFileWriter.commit();
return new FileSplit(new Path(filePath), 0, new File(filePath).length(), new String[0]);
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class AbstractTestHive method doTestMismatchSchemaTable.
protected void doTestMismatchSchemaTable(SchemaTableName schemaTableName, HiveStorageFormat storageFormat, List<ColumnMetadata> tableBefore, MaterializedResult dataBefore, List<ColumnMetadata> tableAfter, MaterializedResult dataAfter) throws Exception {
String schemaName = schemaTableName.getSchemaName();
String tableName = schemaTableName.getTableName();
doCreateEmptyTable(schemaTableName, storageFormat, tableBefore);
// insert the data
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(dataBefore.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
transaction.commit();
}
// load the table and verify the data
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), dataBefore.getMaterializedRows());
transaction.commit();
}
// alter the table schema
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
PrincipalPrivileges principalPrivileges = testingPrincipalPrivilege(session);
Table oldTable = transaction.getMetastore(schemaName).getTable(new HiveIdentity(session), schemaName, tableName).get();
HiveTypeTranslator hiveTypeTranslator = new HiveTypeTranslator();
List<Column> dataColumns = tableAfter.stream().filter(columnMetadata -> !columnMetadata.getName().equals("ds")).map(columnMetadata -> new Column(columnMetadata.getName(), toHiveType(hiveTypeTranslator, columnMetadata.getType()), Optional.empty())).collect(toList());
Table.Builder newTable = Table.builder(oldTable).setDataColumns(dataColumns);
transaction.getMetastore(schemaName).replaceView(new HiveIdentity(session), schemaName, tableName, newTable.build(), principalPrivileges);
transaction.commit();
}
// load the altered table and verify the data
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
List<ColumnHandle> columnHandles = metadata.getColumnHandles(session, tableHandle).values().stream().filter(columnHandle -> !((HiveColumnHandle) columnHandle).isHidden()).collect(toList());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.empty());
assertEqualsIgnoreOrder(result.getMaterializedRows(), dataAfter.getMaterializedRows());
transaction.commit();
}
// insertions to the partitions with type mismatches should fail
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
ConnectorTableHandle tableHandle = getTableHandle(metadata, schemaTableName);
ConnectorInsertTableHandle insertTableHandle = metadata.beginInsert(session, tableHandle);
ConnectorPageSink sink = pageSinkProvider.createPageSink(transaction.getTransactionHandle(), session, insertTableHandle);
sink.appendPage(dataAfter.toPage());
Collection<Slice> fragments = getFutureValue(sink.finish());
metadata.finishInsert(session, insertTableHandle, fragments, ImmutableList.of());
transaction.commit();
fail("expected exception");
} catch (PrestoException e) {
// expected
assertEquals(e.getErrorCode(), HiveErrorCode.HIVE_PARTITION_SCHEMA_MISMATCH.toErrorCode());
}
}
use of io.prestosql.plugin.hive.metastore.StorageFormat in project hetu-core by openlookeng.
the class AbstractTestHive method doCreateEmptyTable.
protected void doCreateEmptyTable(SchemaTableName tableName, HiveStorageFormat storageFormat, List<ColumnMetadata> createTableColumns) throws Exception {
List<String> partitionedBy = createTableColumns.stream().filter(column -> column.getName().equals("ds")).map(ColumnMetadata::getName).collect(toList());
String queryId;
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
queryId = session.getQueryId();
ConnectorTableMetadata tableMetadata = new ConnectorTableMetadata(tableName, createTableColumns, createTableProperties(storageFormat, partitionedBy));
metadata.createTable(session, tableMetadata, false);
transaction.commit();
}
try (Transaction transaction = newTransaction()) {
ConnectorSession session = newSession();
ConnectorMetadata metadata = transaction.getMetadata();
metadata.beginQuery(session);
// load the new table
ConnectorTableHandle tableHandle = getTableHandle(metadata, tableName);
// verify the metadata
ConnectorTableMetadata tableMetadata = metadata.getTableMetadata(session, getTableHandle(metadata, tableName));
List<ColumnMetadata> expectedColumns = createTableColumns.stream().map(column -> new ColumnMetadata(column.getName(), column.getType(), true, column.getComment(), columnExtraInfo(partitionedBy.contains(column.getName())), false, emptyMap(), partitionedBy.contains(column.getName()))).collect(toList());
assertEquals(filterNonHiddenColumnMetadata(tableMetadata.getColumns()), expectedColumns);
// verify table format
Table table = transaction.getMetastore(tableName.getSchemaName()).getTable(new HiveIdentity(session), tableName.getSchemaName(), tableName.getTableName()).get();
assertEquals(table.getStorage().getStorageFormat().getInputFormat(), storageFormat.getInputFormat());
// verify the node version and query ID
assertEquals(table.getParameters().get(PRESTO_VERSION_NAME), TEST_SERVER_VERSION);
assertEquals(table.getParameters().get(PRESTO_QUERY_ID_NAME), queryId);
// verify the table is empty
List<ColumnHandle> columnHandles = filterNonHiddenColumnHandles(metadata.getColumnHandles(session, tableHandle).values());
MaterializedResult result = readTable(transaction, tableHandle, columnHandles, session, TupleDomain.all(), OptionalInt.empty(), Optional.of(storageFormat));
assertEquals(result.getRowCount(), 0);
// verify basic statistics
if (partitionedBy.isEmpty()) {
HiveBasicStatistics statistics = getBasicStatisticsForTable(session, transaction, tableName);
assertEquals(statistics.getRowCount().getAsLong(), 0L);
assertEquals(statistics.getFileCount().getAsLong(), 0L);
assertEquals(statistics.getInMemoryDataSizeInBytes().getAsLong(), 0L);
assertEquals(statistics.getOnDiskDataSizeInBytes().getAsLong(), 0L);
}
}
}
Aggregations