use of io.prestosql.plugin.hive.metastore.Partition in project boostkit-bigdata by kunpengcompute.
the class TestBackgroundHiveSplitLoader method testPartitionedTableWithDynamicFilter.
@Test
public void testPartitionedTableWithDynamicFilter() throws Exception {
TypeManager typeManager = new TestingTypeManager();
List<HivePartitionMetadata> hivePartitionMetadatas = ImmutableList.of(new HivePartitionMetadata(new HivePartition(new SchemaTableName("testSchema", "table_name")), Optional.of(new Partition("testSchema", "table_name", ImmutableList.of("1"), TABLE_STORAGE, ImmutableList.of(TABLE_COLUMN), ImmutableMap.of("param", "value"))), ImmutableMap.of()));
ConnectorSession connectorSession = new TestingConnectorSession(new HiveSessionProperties(new HiveConfig().setMaxSplitSize(new DataSize(1.0, GIGABYTE)).setDynamicFilterPartitionFilteringEnabled(true), new OrcFileWriterConfig(), new ParquetFileWriterConfig()).getSessionProperties());
BackgroundHiveSplitLoader backgroundHiveSplitLoader = new BackgroundHiveSplitLoader(PARTITIONED_TABLE, hivePartitionMetadatas, TupleDomain.all(), BackgroundHiveSplitLoader.BucketSplitInfo.createBucketSplitInfo(Optional.empty(), Optional.empty()), connectorSession, new TestingHdfsEnvironment(TEST_FILES), new NamenodeStats(), new CachingDirectoryLister(new HiveConfig()), directExecutor(), 2, false, Optional.empty(), createTestDynamicFilterSupplier("partitionColumn", ImmutableList.of(0L, 2L, 3L)), Optional.empty(), ImmutableMap.of(), typeManager);
HiveSplitSource hiveSplitSource = hiveSplitSource(backgroundHiveSplitLoader);
backgroundHiveSplitLoader.start(hiveSplitSource);
List<HiveSplit> splits = drainSplits(hiveSplitSource);
assertEquals(splits.size(), 0, "Splits should be filtered");
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class CarbondataMetadata method finishInsertInNewPartition.
@Override
protected void finishInsertInNewPartition(ConnectorSession session, HiveInsertTableHandle handle, Table table, Map<String, Type> columnTypes, PartitionUpdate partitionUpdate, Map<List<String>, ComputedStatistics> partitionComputedStatistics, HiveACIDWriteType acidWriteType) {
// insert into new partition or overwrite existing partition
if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.OVERWRITE) {
List<String> partitionValues = toPartitionValues(partitionUpdate.getName());
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partitionValues));
metastore.finishInsertIntoExistingPartition(session, handle.getSchemaName(), handle.getTableName(), partitionValues, partitionUpdate.getWritePath(), partitionUpdate.getFileNames(), partitionStatistics, acidWriteType);
} else if (partitionUpdate.getUpdateMode() == PartitionUpdate.UpdateMode.APPEND) {
Partition partition = buildPartitionObject(session, table, partitionUpdate);
if (!partition.getStorage().getStorageFormat().getInputFormat().equals(handle.getPartitionStorageFormat().getInputFormat()) && HiveSessionProperties.isRespectTableFormat(session)) {
throw new PrestoException(HiveErrorCode.HIVE_CONCURRENT_MODIFICATION_DETECTED, "Partition format changed during insert");
}
PartitionStatistics partitionStatistics = createPartitionStatistics(session, partitionUpdate.getStatistics(), columnTypes, getColumnStatistics(partitionComputedStatistics, partition.getValues()));
metastore.addPartition(session, handle.getSchemaName(), handle.getTableName(), partition, partitionUpdate.getWritePath(), partitionStatistics, acidWriteType);
}
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class CarbondataMetadata method updateSchemaInfoDropColumn.
private SchemaEvolutionEntry updateSchemaInfoDropColumn(ColumnHandle column) {
HiveColumnHandle columnHandle = (HiveColumnHandle) column;
TableSchema tableSchema = tableInfo.getFactTable();
List<ColumnSchema> tableColumns = tableSchema.getListOfColumns();
int currentSchemaOrdinal = tableColumns.stream().max(Comparator.comparing(ColumnSchema::getSchemaOrdinal)).orElseThrow(NoSuchElementException::new).getSchemaOrdinal() + 1;
TableSchemaBuilder schemaBuilder = new TableSchemaBuilder();
List<ColumnSchema> columnSchemas = new ArrayList<ColumnSchema>();
ColumnSchema newColumn = schemaBuilder.addColumn(new StructField(columnHandle.getColumnName(), CarbondataHetuFilterUtil.spi2CarbondataTypeMapper(columnHandle)), null, false, false);
newColumn.setSchemaOrdinal(currentSchemaOrdinal);
columnSchemas.add(newColumn);
PartitionInfo partitionInfo = tableInfo.getFactTable().getPartitionInfo();
if (partitionInfo != null) {
List<String> partitionColumnSchemaList = tableInfo.getFactTable().getPartitionInfo().getColumnSchemaList().stream().map(cols -> cols.getColumnName()).collect(toList());
if (partitionColumnSchemaList.stream().anyMatch(partitionColumn -> partitionColumn.equals(newColumn.getColumnName()))) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Partition columns cannot be dropped");
}
// when table has two columns, dropping unpartitioned column will be wrong
if (tableColumns.stream().filter(cols -> !cols.getColumnName().equals(newColumn.getColumnName())).map(cols -> cols.getColumnName()).equals(partitionColumnSchemaList)) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot have table with all columns as partition columns");
}
}
if (!tableColumns.stream().filter(cols -> cols.getColumnName().equals(newColumn.getColumnName())).collect(toList()).isEmpty()) {
if (newColumn.isComplexColumn()) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Complex column cannot be dropped");
}
} else {
throw new PrestoException(GENERIC_INTERNAL_ERROR, "Cannot have table with all columns as partition columns");
}
tableInfo.setLastUpdatedTime(System.currentTimeMillis());
tableInfo.setFactTable(tableSchema);
SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry();
schemaEvolutionEntry.setTimeStamp(timeStamp);
schemaEvolutionEntry.setRemoved(columnSchemas);
return schemaEvolutionEntry;
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class BackgroundHiveSplitLoader method loadPartition.
private ListenableFuture<?> loadPartition(HivePartitionMetadata partition) throws IOException {
HivePartition hivePartition = partition.getHivePartition();
String partitionName = hivePartition.getPartitionId();
Properties schema = getPartitionSchema(table, partition.getPartition());
List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
TupleDomain<HiveColumnHandle> effectivePredicate = (TupleDomain<HiveColumnHandle>) compactEffectivePredicate;
if (dynamicFilterSupplier != null && isDynamicFilteringSplitFilteringEnabled(session)) {
if (isPartitionFiltered(partitionKeys, dynamicFilterSupplier.get(), typeManager)) {
// Avoid listing files and creating splits from a partition if it has been pruned due to dynamic filters
return COMPLETED_FUTURE;
}
}
Path path = new Path(getPartitionLocation(table, partition.getPartition()));
InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false, jobConf);
FileSystem fs = hdfsEnvironment.getFileSystem(hdfsContext, path);
boolean s3SelectPushdownEnabled = shouldEnablePushdownForTable(session, table, path.toString(), partition.getPartition());
if (inputFormat instanceof SymlinkTextInputFormat) {
if (tableBucketInfo.isPresent()) {
throw new PrestoException(NOT_SUPPORTED, "Bucketed table in SymlinkTextInputFormat is not yet supported");
}
// TODO: This should use an iterator like the HiveFileIterator
ListenableFuture<?> lastResult = COMPLETED_FUTURE;
for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
// The input should be in TextInputFormat.
TextInputFormat targetInputFormat = new TextInputFormat();
// the splits must be generated using the file system for the target path
// get the configuration for the target path -- it may be a different hdfs instance
FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(hdfsContext, targetPath);
jobConf.setInputFormat(TextInputFormat.class);
targetInputFormat.configure(jobConf);
FileInputFormat.setInputPaths(jobConf, targetPath);
InputSplit[] targetSplits = targetInputFormat.getSplits(jobConf, 0);
InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(targetFilesystem, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions(), Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled);
lastResult = addSplitsToSource(targetSplits, splitFactory);
if (stopped) {
return COMPLETED_FUTURE;
}
}
return lastResult;
}
Optional<BucketConversion> bucketConversion = Optional.empty();
boolean bucketConversionRequiresWorkerParticipation = false;
if (partition.getPartition().isPresent()) {
Optional<HiveBucketProperty> partitionBucketProperty = partition.getPartition().get().getStorage().getBucketProperty();
if (tableBucketInfo.isPresent() && partitionBucketProperty.isPresent()) {
int readBucketCount = tableBucketInfo.get().getReadBucketCount();
// TODO can partition's bucketing_version be different from table's?
BucketingVersion bucketingVersion = partitionBucketProperty.get().getBucketingVersion();
int partitionBucketCount = partitionBucketProperty.get().getBucketCount();
// Here, it's just trying to see if its needs the BucketConversion.
if (readBucketCount != partitionBucketCount) {
bucketConversion = Optional.of(new BucketConversion(bucketingVersion, readBucketCount, partitionBucketCount, tableBucketInfo.get().getBucketColumns()));
if (readBucketCount > partitionBucketCount) {
bucketConversionRequiresWorkerParticipation = true;
}
}
}
}
InternalHiveSplitFactory splitFactory = new InternalHiveSplitFactory(fs, partitionName, inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions(), bucketConversionRequiresWorkerParticipation ? bucketConversion : Optional.empty(), isForceLocalScheduling(session), s3SelectPushdownEnabled);
// on the input format to obtain file splits.
if (!isHudiParquetInputFormat(inputFormat) && shouldUseFileSplitsFromInputFormat(inputFormat)) {
if (tableBucketInfo.isPresent()) {
throw new PrestoException(NOT_SUPPORTED, "Presto cannot read bucketed partition in an input format with UseFileSplitsFromInputFormat annotation: " + inputFormat.getClass().getSimpleName());
}
if (AcidUtils.isTransactionalTable(table.getParameters())) {
throw new PrestoException(NOT_SUPPORTED, "Hive transactional tables in an input format with UseFileSplitsFromInputFormat annotation are not supported: " + inputFormat.getClass().getSimpleName());
}
FileInputFormat.setInputPaths(jobConf, path);
InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
return addSplitsToSource(splits, splitFactory);
}
PathFilter pathFilter = isHudiParquetInputFormat(inputFormat) ? hoodiePathFilterSupplier.get() : path1 -> true;
// S3 Select pushdown works at the granularity of individual S3 objects,
// therefore we must not split files when it is enabled.
boolean splittable = getHeaderCount(schema) == 0 && getFooterCount(schema) == 0 && !s3SelectPushdownEnabled;
List<Path> readPaths;
Optional<DeleteDeltaLocations> deleteDeltaLocations;
long min = Long.MAX_VALUE;
long max = Long.MIN_VALUE;
if (AcidUtils.isTransactionalTable(table.getParameters())) {
boolean isVacuum = queryType.map(type -> type == QueryType.VACUUM).orElse(false);
AcidUtils.Directory directory = hdfsEnvironment.doAs(hdfsContext.getIdentity().getUser(), () -> {
ValidWriteIdList writeIdList = validWriteIds.orElseThrow(() -> new IllegalStateException("No validWriteIds present"));
if (isVacuum) {
writeIdList = new ValidCompactorWriteIdList(writeIdList.writeToString()) {
@Override
public RangeResponse isWriteIdRangeValid(long minWriteId, long maxWriteId) {
// For unknown reasons.. ValidCompactorWriteIdList#isWriteIdRangeValid() doesnot
// check for aborted transactions and AcidUtils.getAcidState() adds aborted transaction to both aborted and working lists.
// Avoid this by overriding.
RangeResponse writeIdRangeValid = super.isWriteIdRangeValid(minWriteId, maxWriteId);
if (writeIdRangeValid == RangeResponse.NONE) {
return RangeResponse.NONE;
} else if (super.isWriteIdRangeAborted(minWriteId, maxWriteId) == RangeResponse.ALL) {
return RangeResponse.NONE;
}
return writeIdRangeValid;
}
};
}
return AcidUtils.getAcidState(path, configuration, writeIdList, Ref.from(false), true, table.getParameters());
});
if (AcidUtils.isFullAcidTable(table.getParameters())) {
// From Hive version >= 3.0, delta/base files will always have file '_orc_acid_version' with value >= '2'.
Path baseOrDeltaPath = directory.getBaseDirectory() != null ? directory.getBaseDirectory() : (directory.getCurrentDirectories().size() > 0 ? directory.getCurrentDirectories().get(0).getPath() : null);
if (baseOrDeltaPath != null && AcidUtils.OrcAcidVersion.getAcidVersionFromMetaFile(baseOrDeltaPath, fs) < 2) {
throw new PrestoException(NOT_SUPPORTED, "Hive transactional tables are supported with Hive 3.0 and only after a major compaction has been run");
}
}
readPaths = new ArrayList<>();
boolean isFullVacuum = isVacuum ? Boolean.valueOf(queryInfo.get("FULL").toString()) : false;
if (isFullVacuum) {
// Base will contain everything
min = 0;
}
// In case of vacuum, include only in case of Full vacuum.
if (directory.getBaseDirectory() != null && (!isVacuum || isFullVacuum)) {
readPaths.add(directory.getBaseDirectory());
if (isVacuum) {
min = 0;
max = AcidUtils.parseBase(directory.getBaseDirectory());
}
}
// delta directories
for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
if (!delta.isDeleteDelta()) {
readPaths.add(delta.getPath());
} else // In case of Minor compaction, all delete_delta files should be compacted separately,
if (isVacuum && !isFullVacuum) {
readPaths.add(delta.getPath());
}
if (isVacuum) {
min = Math.min(delta.getMinWriteId(), min);
max = Math.max(delta.getMaxWriteId(), max);
}
}
// Create a registry of delete_delta directories for the partition
DeleteDeltaLocations.Builder deleteDeltaLocationsBuilder = DeleteDeltaLocations.builder(path);
for (AcidUtils.ParsedDelta delta : directory.getCurrentDirectories()) {
// In case of minor compaction, delete_delta directories should not be used for masking.
if (delta.isDeleteDelta() && (!isVacuum || isFullVacuum)) {
// For unknown reasons ParseDelta.getStatementId() returns 0, though parsed statement is -1;
// This creates issue while trying to locate the delete_delta directory.
// So parsing again.
OptionalInt statementId = getStatementId(delta.getPath().getName());
int stmtId = statementId.orElse(0);
deleteDeltaLocationsBuilder.addDeleteDelta(delta.getPath(), delta.getMinWriteId(), delta.getMaxWriteId(), stmtId);
}
}
deleteDeltaLocations = deleteDeltaLocationsBuilder.build();
if (!directory.getOriginalFiles().isEmpty()) {
LOG.info("Now supporting read from non-ACID files in ACID reader");
// non-ACID file
int numberOfBuckets = Integer.parseInt(schema.getProperty("bucket_count"));
long[] bucketStartRowOffset = new long[Integer.max(numberOfBuckets, 1)];
for (HadoopShims.HdfsFileStatusWithId f : directory.getOriginalFiles()) {
Path currFilePath = f.getFileStatus().getPath();
int currBucketNumber = getBucketNumber(currFilePath.getName()).getAsInt();
fileIterators.addLast(createInternalHiveSplitIterator(currFilePath, fs, splitFactory, splittable, deleteDeltaLocations, Optional.of(bucketStartRowOffset[currBucketNumber]), pathFilter));
try {
Reader copyReader = OrcFile.createReader(f.getFileStatus().getPath(), OrcFile.readerOptions(configuration));
bucketStartRowOffset[currBucketNumber] += copyReader.getNumberOfRows();
} catch (Exception e) {
throw new PrestoException(NOT_SUPPORTED, e.getMessage());
}
}
}
if (isVacuum && !readPaths.isEmpty()) {
Object vacuumHandle = queryInfo.get("vacuumHandle");
if (vacuumHandle != null && vacuumHandle instanceof HiveVacuumTableHandle) {
HiveVacuumTableHandle hiveVacuumTableHandle = (HiveVacuumTableHandle) vacuumHandle;
hiveVacuumTableHandle.addRange(partitionName, new Range(min, max));
}
}
} else {
readPaths = ImmutableList.of(path);
deleteDeltaLocations = Optional.empty();
}
// Bucketed partitions are fully loaded immediately since all files must be loaded to determine the file to bucket mapping
if (tableBucketInfo.isPresent()) {
// TODO document in addToQueue() that it is sufficient to hold on to last returned future
ListenableFuture<?> lastResult = immediateFuture(null);
for (Path readPath : readPaths) {
lastResult = hiveSplitSource.addToQueue(getBucketedSplits(readPath, fs, splitFactory, tableBucketInfo.get(), bucketConversion, getDeleteDeltaLocationFor(readPath, deleteDeltaLocations), pathFilter));
}
return lastResult;
}
for (Path readPath : readPaths) {
fileIterators.addLast(createInternalHiveSplitIterator(readPath, fs, splitFactory, splittable, getDeleteDeltaLocationFor(readPath, deleteDeltaLocations), Optional.empty(), pathFilter));
}
return COMPLETED_FUTURE;
}
use of io.prestosql.plugin.hive.metastore.Partition in project hetu-core by openlookeng.
the class FileHiveMetastore method alterPartition.
@Override
public synchronized void alterPartition(HiveIdentity identity, String databaseName, String tableName, PartitionWithStatistics partitionWithStatistics) {
Table table = getRequiredTable(databaseName, tableName);
Partition partition = partitionWithStatistics.getPartition();
verifiedPartition(table, partition);
Path partitionMetadataDirectory = getPartitionMetadataDirectory(table, partition.getValues());
writeSchemaFile("partition", partitionMetadataDirectory, partitionCodec, new PartitionMetadata(table, partitionWithStatistics), true);
}
Aggregations