Search in sources :

Example 1 with TypeTranslator

use of io.prestosql.plugin.hive.TypeTranslator in project hetu-core by openlookeng.

the class CarbondataMetadata method updateSchemaInfoAddColumn.

private SchemaEvolutionEntry updateSchemaInfoAddColumn(ColumnMetadata column) {
    HiveColumnHandle columnHandle = new HiveColumnHandle(column.getName(), HiveType.toHiveType(typeTranslator, column.getType()), column.getType().getTypeSignature(), tableInfo.getFactTable().getListOfColumns().size(), HiveColumnHandle.ColumnType.REGULAR, Optional.empty());
    TableSchema tableSchema = tableInfo.getFactTable();
    List<ColumnSchema> tableColumns = tableSchema.getListOfColumns();
    int currentSchemaOrdinal = tableColumns.stream().max(Comparator.comparing(ColumnSchema::getSchemaOrdinal)).orElseThrow(NoSuchElementException::new).getSchemaOrdinal() + 1;
    List<ColumnSchema> longStringColumns = new ArrayList<>();
    List<ColumnSchema> allColumns = tableColumns.stream().filter(cols -> cols.isDimensionColumn() && !cols.getDataType().isComplexType() && cols.getSchemaOrdinal() != -1 && (cols.getDataType() != DataTypes.VARCHAR)).collect(toList());
    TableSchemaBuilder schemaBuilder = new TableSchemaBuilder();
    List<ColumnSchema> columnSchemas = new ArrayList<ColumnSchema>();
    ColumnSchema newColumn = schemaBuilder.addColumn(new StructField(columnHandle.getName(), CarbondataHetuFilterUtil.spi2CarbondataTypeMapper(columnHandle)), null, false, false);
    newColumn.setSchemaOrdinal(currentSchemaOrdinal);
    columnSchemas.add(newColumn);
    if (newColumn.getDataType() == DataTypes.VARCHAR) {
        longStringColumns.add(newColumn);
    } else if (newColumn.isDimensionColumn()) {
        // add the column which is not long string
        allColumns.add(newColumn);
    }
    // put the old long string columns
    allColumns.addAll(tableColumns.stream().filter(cols -> cols.isDimensionColumn() && (cols.getDataType() == DataTypes.VARCHAR)).collect(toList()));
    // and the new long string column after old long string columns
    allColumns.addAll(longStringColumns);
    // put complex type columns at the end of dimension columns
    allColumns.addAll(tableColumns.stream().filter(cols -> cols.isDimensionColumn() && (cols.isComplexColumn() || cols.getSchemaOrdinal() == -1)).collect(toList()));
    // original measure columns
    allColumns.addAll(tableColumns.stream().filter(cols -> !cols.isDimensionColumn()).collect(toList()));
    // add new measure column
    if (!newColumn.isDimensionColumn()) {
        allColumns.add(newColumn);
    }
    allColumns.stream().filter(cols -> !cols.isInvisible()).collect(Collectors.groupingBy(ColumnSchema::getColumnName)).forEach((columnName, schemaList) -> {
        if (schemaList.size() > 2) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Duplicate columns found"));
        }
    });
    if (newColumn.isComplexColumn()) {
        throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Complex column cannot be added"));
    }
    List<ColumnSchema> finalAllColumns = allColumns;
    allColumns.stream().forEach(columnSchema -> {
        List<ColumnSchema> colWithSameId = finalAllColumns.stream().filter(x -> x.getColumnUniqueId().equals(columnSchema.getColumnUniqueId())).collect(toList());
        if (colWithSameId.size() > 1) {
            throw new PrestoException(GENERIC_INTERNAL_ERROR, format("Two columns can not have same columnId"));
        }
    });
    if (tableInfo.getFactTable().getPartitionInfo() != null) {
        List<ColumnSchema> par = tableInfo.getFactTable().getPartitionInfo().getColumnSchemaList();
        allColumns = allColumns.stream().filter(cols -> !par.contains(cols)).collect(toList());
        allColumns.addAll(par);
    }
    tableSchema.setListOfColumns(allColumns);
    tableInfo.setLastUpdatedTime(timeStamp);
    tableInfo.setFactTable(tableSchema);
    SchemaEvolutionEntry schemaEvolutionEntry = new SchemaEvolutionEntry();
    schemaEvolutionEntry.setTimeStamp(timeStamp);
    schemaEvolutionEntry.setAdded(columnSchemas);
    return schemaEvolutionEntry;
}
Also used : Arrays(java.util.Arrays) StorageFormat(io.prestosql.plugin.hive.metastore.StorageFormat) BaseStorageFormat(io.prestosql.plugin.hive.BaseStorageFormat) HiveTableHandle(io.prestosql.plugin.hive.HiveTableHandle) FileSystem(org.apache.hadoop.fs.FileSystem) HiveWriteUtils(io.prestosql.plugin.hive.HiveWriteUtils) HiveUtil.hiveColumnHandles(io.prestosql.plugin.hive.HiveUtil.hiveColumnHandles) MetastoreUtil(io.prestosql.plugin.hive.metastore.MetastoreUtil) TableAlreadyExistsException(io.prestosql.spi.connector.TableAlreadyExistsException) ConnectorVacuumTableHandle(io.prestosql.spi.connector.ConnectorVacuumTableHandle) StringUtils(org.apache.commons.lang3.StringUtils) CarbonLockFactory(org.apache.carbondata.core.locks.CarbonLockFactory) HiveUtil.getPartitionKeyColumnHandles(io.prestosql.plugin.hive.HiveUtil.getPartitionKeyColumnHandles) ConnectorDeleteAsInsertTableHandle(io.prestosql.spi.connector.ConnectorDeleteAsInsertTableHandle) CarbonCommonConstants(org.apache.carbondata.core.constants.CarbonCommonConstants) Future(java.util.concurrent.Future) TableNotFoundException(io.prestosql.spi.connector.TableNotFoundException) ConnectorUpdateTableHandle(io.prestosql.spi.connector.ConnectorUpdateTableHandle) Configuration(org.apache.hadoop.conf.Configuration) Map(java.util.Map) HIVE_STRING(io.prestosql.plugin.hive.HiveType.HIVE_STRING) StringEscapeUtils(org.apache.commons.lang3.StringEscapeUtils) HiveErrorCode(io.prestosql.plugin.hive.HiveErrorCode) ThriftWrapperSchemaConverterImpl(org.apache.carbondata.core.metadata.converter.ThriftWrapperSchemaConverterImpl) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) Set(java.util.Set) LOCATION_PROPERTY(io.prestosql.plugin.hive.HiveTableProperties.LOCATION_PROPERTY) HiveTableProperties.getTransactionalValue(io.prestosql.plugin.hive.HiveTableProperties.getTransactionalValue) HiveOutputTableHandle(io.prestosql.plugin.hive.HiveOutputTableHandle) Collectors.joining(java.util.stream.Collectors.joining) BlockMappingVO(org.apache.carbondata.core.mutate.data.BlockMappingVO) CarbonLoadModel(org.apache.carbondata.processing.loading.model.CarbonLoadModel) META_TABLE_NAME(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_NAME) Table(io.prestosql.plugin.hive.metastore.Table) GENERIC_INTERNAL_ERROR(io.prestosql.spi.StandardErrorCode.GENERIC_INTERNAL_ERROR) AccessControlMetadata(io.prestosql.plugin.hive.security.AccessControlMetadata) TableOptionConstant(org.apache.carbondata.processing.util.TableOptionConstant) SortingColumn(io.prestosql.plugin.hive.metastore.SortingColumn) PartitionInfo(org.apache.carbondata.core.metadata.schema.PartitionInfo) TypeTranslator(io.prestosql.plugin.hive.TypeTranslator) ConnectorVacuumTableInfo(io.prestosql.spi.connector.ConnectorVacuumTableInfo) MapredCarbonOutputFormat(org.apache.carbondata.hive.MapredCarbonOutputFormat) StructField(org.apache.carbondata.core.metadata.datatype.StructField) CarbonUtil(org.apache.carbondata.core.util.CarbonUtil) CarbondataTableProperties.getCarbondataLocation(io.hetu.core.plugin.carbondata.CarbondataTableProperties.getCarbondataLocation) HiveWriterFactory(io.prestosql.plugin.hive.HiveWriterFactory) Database(io.prestosql.plugin.hive.metastore.Database) SchemaEvolutionEntry(org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry) Slice(io.airlift.slice.Slice) Partition(io.prestosql.plugin.hive.metastore.Partition) TRANSACTIONAL(io.prestosql.plugin.hive.HiveTableProperties.TRANSACTIONAL) DataTypes(org.apache.carbondata.core.metadata.datatype.DataTypes) CarbonDataMergerUtil(org.apache.carbondata.processing.merger.CarbonDataMergerUtil) SimpleDateFormat(java.text.SimpleDateFormat) ComputedStatistics(io.prestosql.spi.statistics.ComputedStatistics) CarbondataTableReader(io.hetu.core.plugin.carbondata.impl.CarbondataTableReader) ArrayList(java.util.ArrayList) HdfsEnvironment(io.prestosql.plugin.hive.HdfsEnvironment) ThreadLocalSessionInfo(org.apache.carbondata.core.util.ThreadLocalSessionInfo) ScheduledExecutorService(java.util.concurrent.ScheduledExecutorService) LocationService(io.prestosql.plugin.hive.LocationService) LockUsage(org.apache.carbondata.core.locks.LockUsage) CarbonUpdateUtil(org.apache.carbondata.core.mutate.CarbonUpdateUtil) SemiTransactionalHiveMetastore(io.prestosql.plugin.hive.metastore.SemiTransactionalHiveMetastore) SegmentStatusManager(org.apache.carbondata.core.statusmanager.SegmentStatusManager) ConnectorOutputTableHandle(io.prestosql.spi.connector.ConnectorOutputTableHandle) Properties(java.util.Properties) PartitionStatistics(io.prestosql.plugin.hive.PartitionStatistics) CarbonFile(org.apache.carbondata.core.datastore.filesystem.CarbonFile) CarbonOutputCommitter(org.apache.carbondata.hadoop.api.CarbonOutputCommitter) HiveStorageFormat(io.prestosql.plugin.hive.HiveStorageFormat) CarbonTablePath(org.apache.carbondata.core.util.path.CarbonTablePath) HiveInsertTableHandle(io.prestosql.plugin.hive.HiveInsertTableHandle) HiveTableProperties(io.prestosql.plugin.hive.HiveTableProperties) TypeManager(io.prestosql.spi.type.TypeManager) ICarbonLock(org.apache.carbondata.core.locks.ICarbonLock) IOException(java.io.IOException) CarbonTableIdentifier(org.apache.carbondata.core.metadata.CarbonTableIdentifier) PrincipalPrivileges(io.prestosql.plugin.hive.metastore.PrincipalPrivileges) ConnectorTableMetadata(io.prestosql.spi.connector.ConnectorTableMetadata) File(java.io.File) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) ExecutionException(java.util.concurrent.ExecutionException) OutputCommitter(org.apache.hadoop.mapreduce.OutputCommitter) TreeMap(java.util.TreeMap) ColumnHandle(io.prestosql.spi.connector.ColumnHandle) AbsoluteTableIdentifier(org.apache.carbondata.core.metadata.AbsoluteTableIdentifier) HiveWrittenPartitions(io.prestosql.plugin.hive.HiveWrittenPartitions) TableType(org.apache.hadoop.hive.metastore.TableType) META_TABLE_LOCATION(org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.META_TABLE_LOCATION) ConfigurationUtils(io.prestosql.plugin.hive.util.ConfigurationUtils) LocationHandle(io.prestosql.plugin.hive.LocationHandle) CarbonTableOutputFormat(org.apache.carbondata.hadoop.api.CarbonTableOutputFormat) CarbonMetadata(org.apache.carbondata.core.metadata.CarbonMetadata) HiveBasicStatistics(io.prestosql.plugin.hive.HiveBasicStatistics) HivePartitionManager(io.prestosql.plugin.hive.HivePartitionManager) ThriftWriter(org.apache.carbondata.core.writer.ThriftWriter) HiveTypeName(io.prestosql.plugin.hive.HiveTypeName) Date(java.util.Date) SYNTHESIZED(io.prestosql.plugin.hive.HiveColumnHandle.ColumnType.SYNTHESIZED) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) Duration(io.airlift.units.Duration) SegmentFileStore(org.apache.carbondata.core.metadata.SegmentFileStore) TaskType(org.apache.hadoop.mapreduce.TaskType) Logger(org.apache.log4j.Logger) ConnectorSession(io.prestosql.spi.connector.ConnectorSession) Gson(com.google.gson.Gson) Locale(java.util.Locale) HiveCarbonUtil(org.apache.carbondata.hive.util.HiveCarbonUtil) Path(org.apache.hadoop.fs.Path) SegmentUpdateStatusManager(org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager) Type(io.prestosql.spi.type.Type) CarbonTable(org.apache.carbondata.core.metadata.schema.table.CarbonTable) EncodedLoadModel(io.hetu.core.plugin.carbondata.CarbondataConstants.EncodedLoadModel) HiveBucketing(io.prestosql.plugin.hive.HiveBucketing) PrestoException(io.prestosql.spi.PrestoException) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) FileInputFormat(org.apache.hadoop.mapred.FileInputFormat) Collection(java.util.Collection) UUID(java.util.UUID) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) HiveType(io.prestosql.plugin.hive.HiveType) CarbonLockUtil(org.apache.carbondata.core.locks.CarbonLockUtil) Collectors(java.util.stream.Collectors) String.format(java.lang.String.format) CarbonTableInputFormat(org.apache.carbondata.hadoop.api.CarbonTableInputFormat) List(java.util.List) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) Job(org.apache.hadoop.mapreduce.Job) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) Optional(java.util.Optional) NOT_SUPPORTED(io.prestosql.spi.StandardErrorCode.NOT_SUPPORTED) HiveStatisticsProvider(io.prestosql.plugin.hive.statistics.HiveStatisticsProvider) JsonCodec(io.airlift.json.JsonCodec) HiveBucketProperty(io.prestosql.plugin.hive.HiveBucketProperty) ConnectorOutputMetadata(io.prestosql.spi.connector.ConnectorOutputMetadata) Segment(org.apache.carbondata.core.index.Segment) HiveSessionProperties(io.prestosql.plugin.hive.HiveSessionProperties) ConnectorNewTableLayout(io.prestosql.spi.connector.ConnectorNewTableLayout) TableInfo(org.apache.carbondata.core.metadata.schema.table.TableInfo) HashMap(java.util.HashMap) TableOperation(org.apache.carbondata.core.features.TableOperation) CompactionType(org.apache.carbondata.processing.merger.CompactionType) IOConstants(org.apache.hadoop.hive.ql.io.IOConstants) HiveUtil.toPartitionValues(io.prestosql.plugin.hive.HiveUtil.toPartitionValues) FileFactory(org.apache.carbondata.core.datastore.impl.FileFactory) SegmentStatus(org.apache.carbondata.core.statusmanager.SegmentStatus) LoadMetadataDetails(org.apache.carbondata.core.statusmanager.LoadMetadataDetails) Function(java.util.function.Function) ObjectSerializationUtil(org.apache.carbondata.core.util.ObjectSerializationUtil) JobContextImpl(org.apache.hadoop.mapreduce.task.JobContextImpl) HashSet(java.util.HashSet) JobStatus(org.apache.hadoop.mapred.JobStatus) SchemaTableName(io.prestosql.spi.connector.SchemaTableName) ImmutableList(com.google.common.collect.ImmutableList) FileWriteOperation(org.apache.carbondata.core.fileoperations.FileWriteOperation) Objects.requireNonNull(java.util.Objects.requireNonNull) CarbonLoaderUtil(org.apache.carbondata.processing.util.CarbonLoaderUtil) HiveACIDWriteType(io.prestosql.plugin.hive.HiveACIDWriteType) HiveMetadata(io.prestosql.plugin.hive.HiveMetadata) LogServiceFactory(org.apache.carbondata.common.logging.LogServiceFactory) JobID(org.apache.hadoop.mapreduce.JobID) NoSuchElementException(java.util.NoSuchElementException) SegmentUpdateDetails(org.apache.carbondata.core.mutate.SegmentUpdateDetails) HiveIdentity(io.prestosql.plugin.hive.authentication.HiveIdentity) HiveUpdateTableHandle(io.prestosql.plugin.hive.HiveUpdateTableHandle) TableProcessingOperations(org.apache.carbondata.processing.loading.TableProcessingOperations) ColumnMetadata(io.prestosql.spi.connector.ColumnMetadata) ConnectorTableHandle(io.prestosql.spi.connector.ConnectorTableHandle) NON_INHERITABLE_PROPERTIES(io.prestosql.plugin.hive.HiveTableProperties.NON_INHERITABLE_PROPERTIES) SchemaNotFoundException(io.prestosql.spi.connector.SchemaNotFoundException) NoSuchMVException(org.apache.carbondata.common.exceptions.sql.NoSuchMVException) Maps(com.google.common.collect.Maps) HiveDeleteAsInsertTableHandle(io.prestosql.plugin.hive.HiveDeleteAsInsertTableHandle) RowCountDetailsVO(org.apache.carbondata.core.mutate.data.RowCountDetailsVO) CarbondataTableCacheModel(io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel) PartitionUpdate(io.prestosql.plugin.hive.PartitionUpdate) JobConf(org.apache.hadoop.mapred.JobConf) TimeUnit(java.util.concurrent.TimeUnit) Collectors.toList(java.util.stream.Collectors.toList) ConcurrentSkipListSet(java.util.concurrent.ConcurrentSkipListSet) Column(io.prestosql.plugin.hive.metastore.Column) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) ConnectorInsertTableHandle(io.prestosql.spi.connector.ConnectorInsertTableHandle) SchemaConverter(org.apache.carbondata.core.metadata.converter.SchemaConverter) SchemaEvolutionEntry(org.apache.carbondata.core.metadata.schema.SchemaEvolutionEntry) StructField(org.apache.carbondata.core.metadata.datatype.StructField) TableSchema(org.apache.carbondata.core.metadata.schema.table.TableSchema) ArrayList(java.util.ArrayList) TableSchemaBuilder(org.apache.carbondata.core.metadata.schema.table.TableSchemaBuilder) ColumnSchema(org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema) PrestoException(io.prestosql.spi.PrestoException) HiveColumnHandle(io.prestosql.plugin.hive.HiveColumnHandle) NoSuchElementException(java.util.NoSuchElementException)

Aggregations

VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 ImmutableList (com.google.common.collect.ImmutableList)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 ImmutableSet (com.google.common.collect.ImmutableSet)1 Maps (com.google.common.collect.Maps)1 Gson (com.google.gson.Gson)1 JsonCodec (io.airlift.json.JsonCodec)1 Slice (io.airlift.slice.Slice)1 Duration (io.airlift.units.Duration)1 EncodedLoadModel (io.hetu.core.plugin.carbondata.CarbondataConstants.EncodedLoadModel)1 CarbondataTableProperties.getCarbondataLocation (io.hetu.core.plugin.carbondata.CarbondataTableProperties.getCarbondataLocation)1 CarbondataTableCacheModel (io.hetu.core.plugin.carbondata.impl.CarbondataTableCacheModel)1 CarbondataTableReader (io.hetu.core.plugin.carbondata.impl.CarbondataTableReader)1 BaseStorageFormat (io.prestosql.plugin.hive.BaseStorageFormat)1 HdfsEnvironment (io.prestosql.plugin.hive.HdfsEnvironment)1 HiveACIDWriteType (io.prestosql.plugin.hive.HiveACIDWriteType)1 HiveBasicStatistics (io.prestosql.plugin.hive.HiveBasicStatistics)1 HiveBucketProperty (io.prestosql.plugin.hive.HiveBucketProperty)1 HiveBucketing (io.prestosql.plugin.hive.HiveBucketing)1 HiveColumnHandle (io.prestosql.plugin.hive.HiveColumnHandle)1